zsh/fusevm_bridge.rs
1//! fusevm bytecode-VM bridge for ShellExecutor.
2//!
3//! **Extension** — has no Src/exec.c counterpart. C zsh uses a
4//! tree-walking interpreter (`Src/exec.c::execlist`). zshrs compiles
5//! the parsed AST to fusevm bytecode and runs it on a stack VM; this
6//! file holds the bridge between fusevm's `ShellHost` trait and our
7//! `ShellExecutor` state, the thread-local executor pointer, all
8//! `BUILTIN_*` opcode constants, and the giant `register_builtins`
9//! handler table that wires zsh builtins onto fusevm CallBuiltin
10//! opcodes.
11
12#![allow(unused_imports)]
13
14use crate::history::HistoryEngine;
15// MathState is private to math.rs (no public state struct in math.c).
16use crate::options::ZSH_OPTIONS_SET;
17// TcpSessions struct deleted — modules/tcp.rs uses ZTCP_SESSIONS thread_local.
18use crate::zftp::zftp_globals;
19// `Profiler` deleted — zprof state is module-level statics now.
20use crate::zutil::style_table;
21use compsys::cache::CompsysCache;
22use compsys::CompInitResult;
23use parking_lot::Mutex;
24use std::collections::{HashMap, HashSet};
25use std::env;
26use std::path::PathBuf;
27use std::sync::LazyLock;
28use indexmap::IndexMap;
29
30use crate::ported::exec::*;
31use crate::exec_jobs::JobState;
32use crate::intercepts::{AdviceKind, Intercept, intercept_matches};
33use std::io::Write;
34
35// ═══════════════════════════════════════════════════════════════════════════
36// Thread-local executor context for VM builtin dispatch
37// ═══════════════════════════════════════════════════════════════════════════
38
39use std::cell::{Cell, RefCell};
40use crate::socket::bin_zsocket;
41use fusevm::shell_builtins::*;
42use fusevm::Value;
43use crate::ported::zsh_h::{options, MAX_OPS};
44use std::io::BufRead;
45use crate::ported::zle::zle_thingy::getwidgettarget;
46use std::time::{SystemTime, UNIX_EPOCH};
47use std::cmp::Ordering;
48use std::fs;
49use std::os::unix::fs::PermissionsExt;
50use std::time::Instant;
51use std::os::unix::fs::MetadataExt;
52use std::os::unix::fs::FileTypeExt;
53use std::io::Write as _;
54use std::os::unix::io::AsRawFd;
55use std::ffi::CString;
56use std::io::Read;
57use std::os::unix::io::IntoRawFd;
58use fusevm::op::redirect_op as r;
59
60thread_local! {
61 /// Mirror of C zsh's `doneps4` local in execcmd_exec
62 /// (Src/exec.c:2517+). Tracks whether PS4 has been emitted
63 /// for the current xtrace line so a coalesced sequence of
64 /// XTRACE_ASSIGN + XTRACE_ARGS produces ONE line:
65 /// `<PS4>a=1 b=2 echo 1 2\n`
66 /// instead of three. Reset to false by XTRACE_ARGS /
67 /// XTRACE_NEWLINE after emitting the trailing `\n`.
68 static XTRACE_DONE_PS4: Cell<bool> = const { Cell::new(false) };
69}
70
71// Thread-local pointer to the current ShellExecutor.
72// Set before VM execution, cleared after. Used by builtin handlers.
73thread_local! {
74 static CURRENT_EXECUTOR: RefCell<Option<*mut ShellExecutor>> = const { RefCell::new(None) };
75}
76
77/// RAII guard that sets/clears the thread-local executor pointer.
78///
79/// Idempotent: calling `enter` when a context is already active is a no-op
80/// for the entry side, and the guard's drop only clears the thread-local if
81/// *this* call was the one that set it. Nested `execute_command` invocations
82/// (e.g. from inside a builtin handler) reuse the outer pointer instead of
83/// stomping it.
84pub(crate) struct ExecutorContext {
85 we_set_it: bool,
86}
87
88impl ExecutorContext {
89 pub(crate) fn enter(executor: &mut ShellExecutor) -> Self {
90 let we_set_it = CURRENT_EXECUTOR.with(|cell| {
91 let mut slot = cell.borrow_mut();
92 if slot.is_some() {
93 false
94 } else {
95 *slot = Some(executor as *mut ShellExecutor);
96 true
97 }
98 });
99 ExecutorContext { we_set_it }
100 }
101}
102
103impl Drop for ExecutorContext {
104 fn drop(&mut self) {
105 if self.we_set_it {
106 CURRENT_EXECUTOR.with(|cell| {
107 *cell.borrow_mut() = None;
108 });
109 }
110 }
111}
112
113
114/// Access the current executor from a builtin handler.
115/// # Safety
116/// Only call this from within a VM execution context (after ExecutorContext::enter).
117#[inline]
118pub(crate) fn with_executor<F, R>(f: F) -> R
119where
120 F: FnOnce(&mut ShellExecutor) -> R,
121{
122 CURRENT_EXECUTOR.with(|cell| {
123 let ptr = cell
124 .borrow()
125 .expect("with_executor called outside VM context");
126 // SAFETY: The pointer is valid for the duration of VM execution,
127 // and we're single-threaded within the executor.
128 let executor = unsafe { &mut *ptr };
129 f(executor)
130 })
131}
132
133// `try_with_executor` removed. The fallible variant was the bridge
134// canonical-side ports used to mirror writes into the legacy
135// exec.{variables,arrays,assoc_arrays,positional_params,
136// local_save_stack,var_attrs} caches. All such mirrors are now
137// dissolved: canonical setaparam / sethparam / setsparam write
138// paramtab as the single source of truth; fusevm reads consult
139// paramtab via exec.array() / exec.assoc() / exec.scalar() /
140// exec.pparams() / exec.param_flags() helpers.
141//
142// PM_LOCAL scope save lives in BUILTIN_LOCAL dispatcher (with
143// with_executor — the mandatory variant). Eval execute_script lives
144// in BUILTIN_EVAL dispatcher. Lastval reads from canonical LASTVAL
145// atomic that exec.set_last_status keeps current.
146
147
148
149
150/// Look up a canonical builtin by name in `BUILTINS` and dispatch
151/// via `execbuiltin` (Src/builtin.c:250). Mirrors the C pattern
152/// `bn = gethashnode2(builtintab, name); execbuiltin(args, redirs,
153/// bn)`. Returns 1 if no such builtin or if the handler is wired
154/// to None (legacy stub entry — the wrapper on ShellExecutor still
155/// covers those until their handler is wired into BUILTINS).
156pub(crate) fn dispatch_builtin(name: &str, args: Vec<String>) -> i32 {
157 let bn_idx = crate::ported::builtin::BUILTINS.iter()
158 .position(|b| b.node.nam == name);
159 if let Some(idx) = bn_idx {
160 let bn_static: &'static crate::ported::zsh_h::builtin =
161 &crate::ported::builtin::BUILTINS[idx];
162 let bn_ptr = bn_static as *const _ as *mut _;
163 crate::ported::builtin::execbuiltin(args, Vec::new(), bn_ptr)
164 } else {
165 1
166 }
167}
168
169
170/// Register all zsh builtins with the VM.
171pub(crate) fn register_builtins(vm: &mut fusevm::VM) {
172
173 // Macro for builtins that user functions are allowed to shadow.
174 // zsh dispatch order is alias → function → builtin; without the
175 // try_user_fn_override probe a `cat() { ... }; cat` would silently
176 // run the C builtin and ignore the user function.
177 macro_rules! reg_overridable {
178 ($vm:expr, $id:expr, $name:literal, $method:ident) => {
179 $vm.register_builtin($id, |vm, argc| {
180 let args = pop_args(vm, argc);
181 if let Some(s) = try_user_fn_override($name, &args) {
182 return Value::Status(s);
183 }
184 let status = with_executor(|exec| exec.$method(&args));
185 Value::Status(status)
186 });
187 };
188 }
189
190 // Core builtins
191 vm.register_builtin(BUILTIN_CD, |vm, argc| {
192 let args = pop_args(vm, argc);
193 if let Some(s) = try_user_fn_override("cd", &args) {
194 return Value::Status(s);
195 }
196 let status = dispatch_builtin("cd", args);
197 Value::Status(status)
198 });
199
200 vm.register_builtin(BUILTIN_PWD, |vm, argc| {
201 let args = pop_args(vm, argc);
202 if let Some(s) = try_user_fn_override("pwd", &args) {
203 return Value::Status(s);
204 }
205 let status = with_executor(|exec| exec.builtin_pwd_with_args(&args));
206 Value::Status(status)
207 });
208
209 vm.register_builtin(BUILTIN_ECHO, |vm, argc| {
210 let args = pop_args(vm, argc);
211 if let Some(s) = try_user_fn_override("echo", &args) {
212 return Value::Status(s);
213 }
214 // Update `$_` to the last arg before running. C zsh sets
215 // zunderscore in execcmd_exec for every simple command,
216 // including builtins.
217 crate::ported::params::set_zunderscore(&args);
218 let status = with_executor(|exec| exec.builtin_echo(&args, &[]));
219 Value::Status(status)
220 });
221
222 vm.register_builtin(BUILTIN_PRINT, |vm, argc| {
223 let args = pop_args(vm, argc);
224 if let Some(s) = try_user_fn_override("print", &args) {
225 return Value::Status(s);
226 }
227 crate::ported::params::set_zunderscore(&args);
228 let status = dispatch_builtin("print", args);
229 Value::Status(status)
230 });
231
232 vm.register_builtin(BUILTIN_PRINTF, |vm, argc| {
233 let args = pop_args(vm, argc);
234 if let Some(s) = try_user_fn_override("printf", &args) {
235 return Value::Status(s);
236 }
237 let status = with_executor(|exec| exec.builtin_printf(&args));
238 Value::Status(status)
239 });
240
241 vm.register_builtin(BUILTIN_EXPORT, |vm, argc| {
242 let args = pop_args(vm, argc);
243 let status = with_executor(|exec| exec.builtin_export(&args));
244 Value::Status(status)
245 });
246
247 vm.register_builtin(BUILTIN_UNSET, |vm, argc| {
248 let args = pop_args(vm, argc);
249 let status = dispatch_builtin("unset", args);
250 Value::Status(status)
251 });
252
253 vm.register_builtin(BUILTIN_SOURCE, |vm, argc| {
254 let args = pop_args(vm, argc);
255 let status = dispatch_builtin("dot", args);
256 Value::Status(status)
257 });
258
259 vm.register_builtin(BUILTIN_EXIT, |vm, argc| {
260 let args = pop_args(vm, argc);
261 let status = dispatch_builtin("exit", args);
262 Value::Status(status)
263 });
264
265 vm.register_builtin(BUILTIN_RETURN, |vm, argc| {
266 let args = pop_args(vm, argc);
267 // zsh: bare `return` (no arg) returns with the status of
268 // the most recently executed command — `false; return`
269 // returns 1, not 0. Direct port of zsh's bin_break/RETURN.
270 // The executor's `last_status` is stale here (synced at
271 // statement boundaries, not after each VM op), so read
272 // the live `vm.last_status` instead.
273 let live_status = vm.last_status;
274 let status = {
275 // Sync canonical LASTVAL to the VM's view BEFORE
276 // bin_break("return") reads it for the no-arg fallback.
277 with_executor(|exec| exec.set_last_status(live_status));
278 dispatch_builtin("return", args)
279 };
280 Value::Status(status)
281 });
282
283 vm.register_builtin(BUILTIN_TRUE, |vm, argc| {
284 let args = pop_args(vm, argc);
285 if let Some(s) = try_user_fn_override("true", &args) {
286 return Value::Status(s);
287 }
288 // `$_` for no-arg `true` is the command name itself ("true").
289 // pop_args only updates pending_underscore from args; for
290 // bare command name we backfill here.
291 if args.is_empty() {
292 with_executor(|exec| {
293 exec.pending_underscore = Some("true".to_string());
294 });
295 }
296 Value::Status(0)
297 });
298 vm.register_builtin(BUILTIN_FALSE, |vm, argc| {
299 let args = pop_args(vm, argc);
300 if let Some(s) = try_user_fn_override("false", &args) {
301 return Value::Status(s);
302 }
303 if args.is_empty() {
304 with_executor(|exec| {
305 exec.pending_underscore = Some("false".to_string());
306 });
307 }
308 Value::Status(1)
309 });
310 vm.register_builtin(BUILTIN_COLON, |vm, argc| {
311 let args = pop_args(vm, argc);
312 if args.is_empty() {
313 with_executor(|exec| {
314 exec.pending_underscore = Some(":".to_string());
315 });
316 }
317 Value::Status(0)
318 });
319
320 vm.register_builtin(BUILTIN_TEST, |vm, argc| {
321 let args = pop_args(vm, argc);
322 let status = dispatch_builtin("test", args);
323 Value::Status(status)
324 });
325
326 // Variable declaration
327 vm.register_builtin(BUILTIN_LOCAL, |vm, argc| {
328 let args = pop_args(vm, argc);
329 // Canonical bin_local handles the entire scope chain
330 // (`pm->old = oldpm` at Src/params.c:1137 inside createparam,
331 // `pm->level = locallevel` at Src/builtin.c:2576 inside
332 // typeset_single). The dispatcher only routes args.
333 let status = with_executor(|exec| exec.builtin_local(&args));
334 Value::Status(status)
335 });
336
337 vm.register_builtin(BUILTIN_TYPESET, |vm, argc| {
338 let args = pop_args(vm, argc);
339 // fusevm's builtin_id maps both `declare` and `typeset` to
340 // BUILTIN_TYPESET, so this handler must default to the
341 // typeset error-prefix. compile_zsh special-cases `declare`
342 // to register BUILTIN_DECLARE explicitly so that path keeps
343 // the `declare:` prefix in error messages.
344 let status = dispatch_builtin("typeset", args);
345 Value::Status(status)
346 });
347
348 vm.register_builtin(BUILTIN_DECLARE, |vm, argc| {
349 let args = pop_args(vm, argc);
350 let status = with_executor(|exec| exec.builtin_declare(&args));
351 Value::Status(status)
352 });
353
354 vm.register_builtin(BUILTIN_READONLY, |vm, argc| {
355 let args = pop_args(vm, argc);
356 let status = with_executor(|exec| exec.builtin_readonly(&args));
357 Value::Status(status)
358 });
359
360 vm.register_builtin(BUILTIN_INTEGER, |vm, argc| {
361 let args = pop_args(vm, argc);
362 let status = with_executor(|exec| exec.builtin_integer(&args));
363 Value::Status(status)
364 });
365
366 vm.register_builtin(BUILTIN_FLOAT, |vm, argc| {
367 let args = pop_args(vm, argc);
368 let status = with_executor(|exec| exec.builtin_float(&args));
369 Value::Status(status)
370 });
371
372 // I/O
373 vm.register_builtin(BUILTIN_READ, |vm, argc| {
374 let args = pop_args(vm, argc);
375 let status = dispatch_builtin("read", args);
376 Value::Status(status)
377 });
378
379 // Control flow
380 vm.register_builtin(BUILTIN_BREAK, |vm, argc| {
381 let args = pop_args(vm, argc);
382 let status = dispatch_builtin("break", args);
383 Value::Status(status)
384 });
385
386 vm.register_builtin(BUILTIN_CONTINUE, |vm, argc| {
387 let args = pop_args(vm, argc);
388 let status = dispatch_builtin("continue", args);
389 Value::Status(status)
390 });
391
392 vm.register_builtin(BUILTIN_SHIFT, |vm, argc| {
393 let args = pop_args(vm, argc);
394 let status = dispatch_builtin("shift", args);
395 Value::Status(status)
396 });
397
398 vm.register_builtin(BUILTIN_EVAL, |vm, argc| {
399 // Direct port of `bin_eval(UNUSED(char *nam), char **argv, UNUSED(Options ops), UNUSED(int func))` body from Src/builtin.c:6151:
400 // `if (!*argv) return 0;`
401 // `prog = parse_string(zjoin(argv, ' ', 1), 1);`
402 // `execode(prog, 1, 0, "eval");`
403 // The execode invocation lives here (not in the canonical
404 // free-fn) because it must run through the bytecode VM's
405 // current executor — the same VM that's mid-dispatch.
406 let args = pop_args(vm, argc);
407 if args.is_empty() {
408 return Value::Status(0); // c:6160
409 }
410 let src = args.join(" "); // c:6166
411 let status = with_executor(|exec| { // c:6175 execode
412 exec.execute_script(&src).unwrap_or(1)
413 });
414 Value::Status(status)
415 });
416
417 // BUILTIN_EXEC / BUILTIN_COMMAND / BUILTIN_BUILTIN wires deleted
418 // along with their handler stubs in src/exec.rs. The opcodes were
419 // never emitted by the fusevm compiler (zero `Op::CallBuiltin(...)`
420 // references) — leftover from the deleted `Src/exec.c` tree-walker
421 // port. When `command` / `exec` / `builtin` land as canonical
422 // ports in `src/ported/builtin.rs` (`Src/builtin.c:4017 bin_command`,
423 // `:6052 bin_exec`, etc.), wire them here through `execbuiltin`.
424
425 vm.register_builtin(BUILTIN_LET, |vm, argc| {
426 let args = pop_args(vm, argc);
427 let status = dispatch_builtin("let", args);
428 Value::Status(status)
429 });
430
431 // Job control
432 vm.register_builtin(BUILTIN_JOBS, |vm, argc| {
433 let args = pop_args(vm, argc);
434 let status = dispatch_builtin("jobs", args);
435 Value::Status(status)
436 });
437
438 vm.register_builtin(BUILTIN_FG, |vm, argc| {
439 let args = pop_args(vm, argc);
440 let status = dispatch_builtin("fg", args);
441 Value::Status(status)
442 });
443
444 vm.register_builtin(BUILTIN_BG, |vm, argc| {
445 let args = pop_args(vm, argc);
446 let status = dispatch_builtin("bg", args);
447 Value::Status(status)
448 });
449
450 vm.register_builtin(BUILTIN_KILL, |vm, argc| {
451 let args = pop_args(vm, argc);
452 let status = dispatch_builtin("kill", args);
453 Value::Status(status)
454 });
455
456 vm.register_builtin(BUILTIN_DISOWN, |vm, argc| {
457 let args = pop_args(vm, argc);
458 let status = dispatch_builtin("disown", args);
459 Value::Status(status)
460 });
461
462 vm.register_builtin(BUILTIN_WAIT, |vm, argc| {
463 let args = pop_args(vm, argc);
464 let status = dispatch_builtin("wait", args);
465 Value::Status(status)
466 });
467
468 vm.register_builtin(BUILTIN_SUSPEND, |vm, argc| {
469 let args = pop_args(vm, argc);
470 let status = dispatch_builtin("suspend", args);
471 Value::Status(status)
472 });
473
474 // History
475 // BUILTIN_HISTORY / BUILTIN_R wires deleted with their stubs.
476 // Opcodes never emitted by the fusevm compiler (dead since the
477 // tree-walker port was replaced). `bin_fc` stays — it's wired to
478 // the canonical port at `src/ported/builtin.rs`.
479 vm.register_builtin(BUILTIN_FC, |vm, argc| {
480 let args = pop_args(vm, argc);
481 let status = dispatch_builtin("fc", args);
482 Value::Status(status)
483 });
484
485 // Aliases
486 vm.register_builtin(BUILTIN_ALIAS, |vm, argc| {
487 let args = pop_args(vm, argc);
488 let status = dispatch_builtin("alias", args);
489 Value::Status(status)
490 });
491
492 // BUILTIN_UNALIAS wire deleted with its stub.
493
494 // Options
495 vm.register_builtin(BUILTIN_SET, |vm, argc| {
496 let args = pop_args(vm, argc);
497 let status = dispatch_builtin("set", args);
498 Value::Status(status)
499 });
500
501 vm.register_builtin(BUILTIN_SETOPT, |vm, argc| {
502 let args = pop_args(vm, argc);
503 // Canonical bin_setopt per options.c:580 — `isun` discriminant
504 // flips the action polarity; setopt → 0, unsetopt → 1.
505 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
506 argscount: 0, argsalloc: 0 };
507 let status = crate::ported::options::bin_setopt(
508 "setopt", &args, &ops, 0);
509 Value::Status(status)
510 });
511
512 vm.register_builtin(BUILTIN_UNSETOPT, |vm, argc| {
513 let args = pop_args(vm, argc);
514 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
515 argscount: 0, argsalloc: 0 };
516 let status = crate::ported::options::bin_setopt(
517 "unsetopt", &args, &ops, 1);
518 Value::Status(status)
519 });
520
521 vm.register_builtin(BUILTIN_SHOPT, |vm, argc| {
522 let args = pop_args(vm, argc);
523 let status = crate::extensions::ext_builtins::shopt(&args);
524 Value::Status(status)
525 });
526
527 vm.register_builtin(BUILTIN_EMULATE, |vm, argc| {
528 let args = pop_args(vm, argc);
529 let status = dispatch_builtin("emulate", args);
530 Value::Status(status)
531 });
532
533 vm.register_builtin(BUILTIN_GETOPTS, |vm, argc| {
534 let args = pop_args(vm, argc);
535 let status = dispatch_builtin("getopts", args);
536 Value::Status(status)
537 });
538
539 // BUILTIN_AUTOLOAD / BUILTIN_UNFUNCTION wires deleted with their
540 // stubs. `bin_functions` stays — wired to the canonical port.
541 vm.register_builtin(BUILTIN_FUNCTIONS, |vm, argc| {
542 let args = pop_args(vm, argc);
543 let status = dispatch_builtin("functions", args);
544 Value::Status(status)
545 });
546
547 // Traps
548 vm.register_builtin(BUILTIN_TRAP, |vm, argc| {
549 let args = pop_args(vm, argc);
550 let status = dispatch_builtin("trap", args);
551 Value::Status(status)
552 });
553
554 // BUILTIN_PUSHD / BUILTIN_POPD wires deleted with their stubs.
555 // `bin_dirs` stays — wired to the canonical port.
556 vm.register_builtin(BUILTIN_DIRS, |vm, argc| {
557 let args = pop_args(vm, argc);
558 let status = dispatch_builtin("dirs", args);
559 Value::Status(status)
560 });
561
562 // type / whence / where / which all route through `bin_whence`
563 // (canonical port at `src/ported/builtin.rs:3734` of
564 // `Src/builtin.c:3975`). Each gets its own opcode so funcid +
565 // defopts come from the BUILTINS table entry — execbuiltin
566 // applies them correctly via the module-level dispatch_builtin.
567 vm.register_builtin(BUILTIN_WHENCE, |vm, argc| {
568 let args = pop_args(vm, argc);
569 Value::Status(dispatch_builtin("whence", args))
570 });
571 vm.register_builtin(BUILTIN_TYPE, |vm, argc| {
572 let args = pop_args(vm, argc);
573 Value::Status(dispatch_builtin("type", args))
574 });
575 vm.register_builtin(BUILTIN_WHICH, |vm, argc| {
576 let args = pop_args(vm, argc);
577 Value::Status(dispatch_builtin("which", args))
578 });
579 vm.register_builtin(BUILTIN_WHERE, |vm, argc| {
580 let args = pop_args(vm, argc);
581 Value::Status(dispatch_builtin("where", args))
582 });
583
584 vm.register_builtin(BUILTIN_HASH, |vm, argc| {
585 let args = pop_args(vm, argc);
586 let status = dispatch_builtin("hash", args);
587 Value::Status(status)
588 });
589
590 vm.register_builtin(BUILTIN_REHASH, |vm, argc| {
591 let args = pop_args(vm, argc);
592 let status = dispatch_builtin("rehash", args);
593 Value::Status(status)
594 });
595
596 // `unhash`/`unalias`/`unfunction` share `bin_unhash` (Src/builtin.c:
597 // c:4350) but each carries its own funcid (BIN_UNHASH /
598 // BIN_UNALIAS / BIN_UNFUNCTION) in the BUILTINS table. Route each
599 // through `execbuiltin` so the correct funcid + optstr propagate
600 // — earlier wiring passed funcid=0 unconditionally and `unalias`
601 // silently no-op'd on the cmdnamtab path.
602 fn unhash_via_execbuiltin(name: &str, args: Vec<String>) -> i32 {
603 let bn_idx = crate::ported::builtin::BUILTINS.iter()
604 .position(|b| b.node.nam == name);
605 if let Some(idx) = bn_idx {
606 let bn_static: &'static crate::ported::zsh_h::builtin =
607 &crate::ported::builtin::BUILTINS[idx];
608 let bn_ptr = bn_static as *const _ as *mut _;
609 crate::ported::builtin::execbuiltin(args, Vec::new(), bn_ptr)
610 } else {
611 1
612 }
613 }
614 vm.register_builtin(BUILTIN_UNHASH, |vm, argc| {
615 let args = pop_args(vm, argc);
616 Value::Status(unhash_via_execbuiltin("unhash", args))
617 });
618 vm.register_builtin(BUILTIN_UNALIAS, |vm, argc| {
619 let args = pop_args(vm, argc);
620 Value::Status(unhash_via_execbuiltin("unalias", args))
621 });
622 vm.register_builtin(BUILTIN_UNFUNCTION, |vm, argc| {
623 let args = pop_args(vm, argc);
624 Value::Status(unhash_via_execbuiltin("unfunction", args))
625 });
626
627 // Completion
628 vm.register_builtin(BUILTIN_COMPGEN, |vm, argc| {
629 let args = pop_args(vm, argc);
630 let status = with_executor(|exec| exec.builtin_compgen(&args));
631 Value::Status(status)
632 });
633
634 vm.register_builtin(BUILTIN_COMPLETE, |vm, argc| {
635 let args = pop_args(vm, argc);
636 let status = with_executor(|exec| exec.builtin_complete(&args));
637 Value::Status(status)
638 });
639
640 vm.register_builtin(BUILTIN_COMPOPT, |vm, argc| {
641 let args = pop_args(vm, argc);
642 let status = with_executor(|exec| exec.builtin_compopt(&args));
643 Value::Status(status)
644 });
645
646 vm.register_builtin(BUILTIN_COMPADD, |vm, argc| {
647 let args = pop_args(vm, argc);
648 let status = dispatch_builtin("compadd", args);
649 Value::Status(status)
650 });
651
652 vm.register_builtin(BUILTIN_COMPSET, |vm, argc| {
653 let args = pop_args(vm, argc);
654 let status = dispatch_builtin("compset", args);
655 Value::Status(status)
656 });
657
658 vm.register_builtin(BUILTIN_COMPDEF, |vm, argc| {
659 let args = pop_args(vm, argc);
660 let status = with_executor(|exec| exec.builtin_compdef(&args));
661 Value::Status(status)
662 });
663
664 vm.register_builtin(BUILTIN_COMPINIT, |vm, argc| {
665 let args = pop_args(vm, argc);
666 let status = with_executor(|exec| exec.builtin_compinit(&args));
667 Value::Status(status)
668 });
669
670 vm.register_builtin(BUILTIN_CDREPLAY, |vm, argc| {
671 let args = pop_args(vm, argc);
672 let status = with_executor(|exec| exec.builtin_cdreplay(&args));
673 Value::Status(status)
674 });
675
676 // Zsh-specific
677 vm.register_builtin(BUILTIN_ZSTYLE, |vm, argc| {
678 let args = pop_args(vm, argc);
679 let status = dispatch_builtin("zstyle", args);
680 Value::Status(status)
681 });
682
683 vm.register_builtin(BUILTIN_ZMODLOAD, |vm, argc| {
684 let args = pop_args(vm, argc);
685 let status = dispatch_builtin("zmodload", args);
686 Value::Status(status)
687 });
688
689 vm.register_builtin(BUILTIN_BINDKEY, |vm, argc| {
690 let args = pop_args(vm, argc);
691 let status = dispatch_builtin("bindkey", args);
692 Value::Status(status)
693 });
694
695 vm.register_builtin(BUILTIN_ZLE, |vm, argc| {
696 let args = pop_args(vm, argc);
697 let status = dispatch_builtin("zle", args);
698 Value::Status(status)
699 });
700
701 vm.register_builtin(BUILTIN_VARED, |vm, argc| {
702 let args = pop_args(vm, argc);
703 let status = dispatch_builtin("vared", args);
704 Value::Status(status)
705 });
706
707 vm.register_builtin(BUILTIN_ZCOMPILE, |vm, argc| {
708 let args = pop_args(vm, argc);
709 let status = with_executor(|exec| exec.bin_zcompile(&args));
710 Value::Status(status)
711 });
712
713 vm.register_builtin(BUILTIN_ZFORMAT, |vm, argc| {
714 let args = pop_args(vm, argc);
715 let status = dispatch_builtin("zformat", args);
716 Value::Status(status)
717 });
718
719 vm.register_builtin(BUILTIN_ZPARSEOPTS, |vm, argc| {
720 let args = pop_args(vm, argc);
721 let status = dispatch_builtin("zparseopts", args);
722 Value::Status(status)
723 });
724
725 vm.register_builtin(BUILTIN_ZREGEXPARSE, |vm, argc| {
726 let args = pop_args(vm, argc);
727 let status = dispatch_builtin("zregexparse", args);
728 Value::Status(status)
729 });
730
731 // Resource limits
732 vm.register_builtin(BUILTIN_ULIMIT, |vm, argc| {
733 let args = pop_args(vm, argc);
734 let status = with_executor(|exec| exec.bin_ulimit(&args));
735 Value::Status(status)
736 });
737
738 vm.register_builtin(BUILTIN_LIMIT, |vm, argc| {
739 let args = pop_args(vm, argc);
740 let status = with_executor(|exec| exec.bin_limit(&args));
741 Value::Status(status)
742 });
743
744 vm.register_builtin(BUILTIN_UNLIMIT, |vm, argc| {
745 let args = pop_args(vm, argc);
746 let status = with_executor(|exec| exec.bin_unlimit(&args));
747 Value::Status(status)
748 });
749
750 vm.register_builtin(BUILTIN_UMASK, |vm, argc| {
751 let args = pop_args(vm, argc);
752 let status = dispatch_builtin("umask", args);
753 Value::Status(status)
754 });
755
756 // Misc
757 vm.register_builtin(BUILTIN_TIMES, |vm, argc| {
758 let args = pop_args(vm, argc);
759 let status = dispatch_builtin("times", args);
760 Value::Status(status)
761 });
762
763 vm.register_builtin(BUILTIN_CALLER, |vm, argc| {
764 let args = pop_args(vm, argc);
765 let status = with_executor(|exec| exec.builtin_caller(&args));
766 Value::Status(status)
767 });
768
769 vm.register_builtin(BUILTIN_HELP, |vm, argc| {
770 let args = pop_args(vm, argc);
771 let status = with_executor(|exec| exec.builtin_help(&args));
772 Value::Status(status)
773 });
774
775 vm.register_builtin(BUILTIN_ENABLE, |vm, argc| {
776 let args = pop_args(vm, argc);
777 let status = dispatch_builtin("enable", args);
778 Value::Status(status)
779 });
780
781 vm.register_builtin(BUILTIN_DISABLE, |vm, argc| {
782 let args = pop_args(vm, argc);
783 let status = dispatch_builtin("disable", args);
784 Value::Status(status)
785 });
786
787 // BUILTIN_NOGLOB wire deleted with its stub.
788
789 vm.register_builtin(BUILTIN_TTYCTL, |vm, argc| {
790 let args = pop_args(vm, argc);
791 let status = dispatch_builtin("ttyctl", args);
792 Value::Status(status)
793 });
794
795 vm.register_builtin(BUILTIN_SYNC, |vm, argc| {
796 let args = pop_args(vm, argc);
797 // Canonical bin_sync per files.c:53 — `sync(); return 0;`.
798 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
799 argscount: 0, argsalloc: 0 };
800 let status = crate::ported::modules::files::bin_sync(
801 "sync", &args, &ops, 0);
802 Value::Status(status)
803 });
804
805 vm.register_builtin(BUILTIN_MKDIR, |vm, argc| {
806 let args = pop_args(vm, argc);
807 // Canonical bin_mkdir wired in BUILTINS table (files.c:63).
808 // execbuiltin handles the "pm:" optstr parsing.
809 Value::Status(dispatch_builtin("mkdir", args))
810 });
811
812 vm.register_builtin(BUILTIN_STRFTIME, |vm, argc| {
813 let args = pop_args(vm, argc);
814 // Canonical bin_strftime takes (nam, argv, ops, func) per
815 // Src/Modules/datetime.c:187. Adapt &[String] → &[&str] +
816 // empty options inline (datetime parses no flags).
817 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
818 argscount: 0, argsalloc: 0 };
819 let argv: Vec<&str> = args.iter().map(String::as_str).collect();
820 let status = crate::ported::modules::datetime::bin_strftime(
821 "strftime", &argv, &ops, 0);
822 Value::Status(status)
823 });
824
825 vm.register_builtin(BUILTIN_ZSLEEP, |vm, argc| {
826 let args = pop_args(vm, argc);
827 let status = crate::extensions::ext_builtins::zsleep(&args);
828 Value::Status(status)
829 });
830
831 vm.register_builtin(BUILTIN_ZSYSTEM, |vm, argc| {
832 let args = pop_args(vm, argc);
833 // bin_zsystem now takes the canonical C signature
834 // (name, args, ops, func) per Src/Modules/system.c:806.
835 let ops = crate::ported::zsh_h::options {
836 ind: [0u8; crate::ported::zsh_h::MAX_OPS],
837 args: Vec::new(), argscount: 0, argsalloc: 0,
838 };
839 let _ = with_executor(|_exec| ());
840 let status = crate::modules::system::bin_zsystem("zsystem", &args, &ops, 0);
841 Value::Status(status)
842 });
843
844 // PCRE
845 vm.register_builtin(BUILTIN_PCRE_COMPILE, |vm, argc| {
846 let args = pop_args(vm, argc);
847 let status = dispatch_builtin("pcre_compile", args);
848 Value::Status(status)
849 });
850
851 vm.register_builtin(BUILTIN_PCRE_MATCH, |vm, argc| {
852 let args = pop_args(vm, argc);
853 let status = dispatch_builtin("pcre_match", args);
854 Value::Status(status)
855 });
856
857 vm.register_builtin(BUILTIN_PCRE_STUDY, |vm, argc| {
858 let args = pop_args(vm, argc);
859 let status = dispatch_builtin("pcre_study", args);
860 Value::Status(status)
861 });
862
863 // Database (GDBM)
864 vm.register_builtin(BUILTIN_ZTIE, |vm, argc| {
865 let args = pop_args(vm, argc);
866 let status = dispatch_builtin("ztie", args);
867 Value::Status(status)
868 });
869
870 vm.register_builtin(BUILTIN_ZUNTIE, |vm, argc| {
871 let args = pop_args(vm, argc);
872 let status = dispatch_builtin("zuntie", args);
873 Value::Status(status)
874 });
875
876 vm.register_builtin(BUILTIN_ZGDBMPATH, |vm, argc| {
877 let args = pop_args(vm, argc);
878 let status = dispatch_builtin("zgdbmpath", args);
879 Value::Status(status)
880 });
881
882 // Prompt
883 vm.register_builtin(BUILTIN_PROMPTINIT, |vm, argc| {
884 let args = pop_args(vm, argc);
885 Value::Status(crate::extensions::ext_builtins::promptinit(&args))
886 });
887
888 vm.register_builtin(BUILTIN_PROMPT, |vm, argc| {
889 let args = pop_args(vm, argc);
890 Value::Status(crate::extensions::ext_builtins::prompt(&args))
891 });
892
893 // Async / Parallel (zshrs extensions)
894 vm.register_builtin(BUILTIN_ASYNC, |vm, argc| {
895 let args = pop_args(vm, argc);
896 let status = with_executor(|exec| exec.builtin_async(&args));
897 Value::Status(status)
898 });
899
900 vm.register_builtin(BUILTIN_AWAIT, |vm, argc| {
901 let args = pop_args(vm, argc);
902 let status = with_executor(|exec| exec.builtin_await(&args));
903 Value::Status(status)
904 });
905
906 vm.register_builtin(BUILTIN_PMAP, |vm, argc| {
907 let args = pop_args(vm, argc);
908 let status = with_executor(|exec| exec.builtin_pmap(&args));
909 Value::Status(status)
910 });
911
912 vm.register_builtin(BUILTIN_PGREP, |vm, argc| {
913 let args = pop_args(vm, argc);
914 let status = with_executor(|exec| exec.builtin_pgrep(&args));
915 Value::Status(status)
916 });
917
918 vm.register_builtin(BUILTIN_PEACH, |vm, argc| {
919 let args = pop_args(vm, argc);
920 let status = with_executor(|exec| exec.builtin_peach(&args));
921 Value::Status(status)
922 });
923
924 vm.register_builtin(BUILTIN_BARRIER, |vm, argc| {
925 let args = pop_args(vm, argc);
926 let status = with_executor(|exec| exec.builtin_barrier(&args));
927 Value::Status(status)
928 });
929
930 // Intercept (AOP)
931 vm.register_builtin(BUILTIN_INTERCEPT, |vm, argc| {
932 let args = pop_args(vm, argc);
933 let status = with_executor(|exec| exec.builtin_intercept(&args));
934 Value::Status(status)
935 });
936
937 vm.register_builtin(BUILTIN_INTERCEPT_PROCEED, |vm, argc| {
938 let args = pop_args(vm, argc);
939 let status = with_executor(|exec| exec.builtin_intercept_proceed(&args));
940 Value::Status(status)
941 });
942
943 // Debug / Profile
944 vm.register_builtin(BUILTIN_DOCTOR, |vm, argc| {
945 let args = pop_args(vm, argc);
946 let status = with_executor(|exec| exec.builtin_doctor(&args));
947 Value::Status(status)
948 });
949
950 vm.register_builtin(BUILTIN_DBVIEW, |vm, argc| {
951 let args = pop_args(vm, argc);
952 let status = with_executor(|exec| exec.builtin_dbview(&args));
953 Value::Status(status)
954 });
955
956 vm.register_builtin(BUILTIN_PROFILE, |vm, argc| {
957 let args = pop_args(vm, argc);
958 let status = with_executor(|exec| exec.builtin_profile(&args));
959 Value::Status(status)
960 });
961
962 vm.register_builtin(BUILTIN_ZPROF, |vm, argc| {
963 let args = pop_args(vm, argc);
964 // bin_zprof now takes the canonical C signature
965 // (name, args, ops, func) per Src/Modules/zprof.c:139.
966 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
967 argscount: 0, argsalloc: 0 };
968 if args.iter().any(|a| a == "-c") { ops.ind[b'c' as usize] = 1; }
969 let _ = with_executor(|_exec| ());
970 let status = crate::modules::zprof::bin_zprof("zprof", &args, &ops, 0);
971 Value::Status(status)
972 });
973
974 // ═══════════════════════════════════════════════════════════════════════
975 // Coreutils builtins (anti-fork, gated by !posix_mode)
976 //
977 // All of these are routinely wrapped by user functions in real
978 // dotfiles (zpwr, oh-my-zsh, etc.) — `cat() { ... }`, `ls() { ... }`,
979 // `find() { ... }`. Each handler MUST consult try_user_fn_override
980 // first (via reg_overridable!) so the user definition wins, matching
981 // zsh's alias → function → builtin dispatch order.
982 // ═══════════════════════════════════════════════════════════════════════
983
984 reg_overridable!(vm, BUILTIN_CAT, "cat", builtin_cat);
985 reg_overridable!(vm, BUILTIN_HEAD, "head", builtin_head);
986 reg_overridable!(vm, BUILTIN_TAIL, "tail", builtin_tail);
987 reg_overridable!(vm, BUILTIN_WC, "wc", builtin_wc);
988 reg_overridable!(vm, BUILTIN_BASENAME, "basename", builtin_basename);
989 reg_overridable!(vm, BUILTIN_DIRNAME, "dirname", builtin_dirname);
990 reg_overridable!(vm, BUILTIN_TOUCH, "touch", builtin_touch);
991 reg_overridable!(vm, BUILTIN_REALPATH, "realpath", builtin_realpath);
992 reg_overridable!(vm, BUILTIN_SORT, "sort", builtin_sort);
993 reg_overridable!(vm, BUILTIN_FIND, "find", builtin_find);
994 reg_overridable!(vm, BUILTIN_UNIQ, "uniq", builtin_uniq);
995 reg_overridable!(vm, BUILTIN_CUT, "cut", builtin_cut);
996 reg_overridable!(vm, BUILTIN_TR, "tr", builtin_tr);
997 reg_overridable!(vm, BUILTIN_SEQ, "seq", builtin_seq);
998 reg_overridable!(vm, BUILTIN_REV, "rev", builtin_rev);
999 reg_overridable!(vm, BUILTIN_TEE, "tee", builtin_tee);
1000 reg_overridable!(vm, BUILTIN_SLEEP, "sleep", builtin_sleep);
1001 reg_overridable!(vm, BUILTIN_WHOAMI, "whoami", builtin_whoami);
1002 reg_overridable!(vm, BUILTIN_ID, "id", builtin_id);
1003
1004 reg_overridable!(vm, BUILTIN_HOSTNAME, "hostname", builtin_hostname);
1005 reg_overridable!(vm, BUILTIN_UNAME, "uname", builtin_uname);
1006 reg_overridable!(vm, BUILTIN_DATE, "date", builtin_date);
1007 reg_overridable!(vm, BUILTIN_MKTEMP, "mktemp", builtin_mktemp);
1008
1009 // BUILTIN_EXPAND_WORD_RUNTIME (id 281) was a legacy JSON round-trip
1010 // bridge that no chunk emits anymore. The constant + handler are
1011 // removed; the ID stays reserved in the gap before
1012 // BUILTIN_REGISTER_FUNCTION so future remaps don't reuse it.
1013
1014 // Pipeline execution — bytecode-native fork-per-stage. Pops N sub-chunk
1015 // indices, forks N children with stdin/stdout wired through N-1 pipes,
1016 // each child runs its stage's compiled bytecode and exits. Parent waits
1017 // and returns the last stage's status.
1018 //
1019 // Caveats: post-fork in a multi-threaded program, only async-signal-safe
1020 // ops are POSIX-safe. We violate this (running the bytecode VM after fork
1021 // touches mutexes like REGEX_CACHE). In practice, most pipeline stages
1022 // don't touch shared mutex state — externals fork/exec away, builtins do
1023 // pure I/O. Risks are bounded; if a stage does touch a held mutex, the
1024 // child deadlocks.
1025 vm.register_builtin(BUILTIN_RUN_PIPELINE, |vm, argc| {
1026 let n = argc as usize;
1027 if n == 0 {
1028 return Value::Status(0);
1029 }
1030
1031 // Pop N sub-chunk indices (LIFO → reverse to stage order)
1032 let mut indices: Vec<u16> = Vec::with_capacity(n);
1033 for _ in 0..n {
1034 indices.push(vm.pop().to_int() as u16);
1035 }
1036 indices.reverse();
1037
1038 // Clone each stage's sub-chunk
1039 let stages: Vec<fusevm::Chunk> = indices
1040 .iter()
1041 .filter_map(|&i| vm.chunk.sub_chunks.get(i as usize).cloned())
1042 .collect();
1043 if stages.len() != n {
1044 return Value::Status(1);
1045 }
1046
1047 // Single stage — no pipe, just run inline
1048 if n == 1 {
1049 let mut stage_vm = fusevm::VM::new(stages.into_iter().next().unwrap());
1050 register_builtins(&mut stage_vm);
1051 let _ = stage_vm.run();
1052 return Value::Status(stage_vm.last_status);
1053 }
1054
1055 // Build N-1 pipes
1056 let mut pipes: Vec<(i32, i32)> = Vec::with_capacity(n - 1);
1057 for _ in 0..n - 1 {
1058 let mut fds = [0i32; 2];
1059 if unsafe { libc::pipe(fds.as_mut_ptr()) } < 0 {
1060 // Cleanup any pipes we already created
1061 for (r, w) in &pipes {
1062 unsafe {
1063 libc::close(*r);
1064 libc::close(*w);
1065 }
1066 }
1067 return Value::Status(1);
1068 }
1069 pipes.push((fds[0], fds[1]));
1070 }
1071
1072 // zsh runs the LAST stage of a pipeline in the CURRENT shell
1073 // (not a forked child) so a trailing `read x` keeps its
1074 // assignment in the parent. Other shells (bash) fork every
1075 // stage. Honor zsh by leaving stage N-1 inline. Forks the
1076 // first N-1 stages with fork(); runs the last in this process
1077 // with stdin dup2'd to the last pipe's read end and stdout
1078 // restored after.
1079 let last_idx = n - 1;
1080 let stages_vec: Vec<fusevm::Chunk> = stages.into_iter().collect();
1081
1082 let mut child_pids: Vec<libc::pid_t> = Vec::with_capacity(n - 1);
1083 for (i, chunk) in stages_vec.iter().take(last_idx).enumerate() {
1084 match unsafe { libc::fork() } {
1085 -1 => {
1086 // fork failed — kill any children we already started
1087 for pid in &child_pids {
1088 unsafe { libc::kill(*pid, libc::SIGTERM) };
1089 }
1090 for (r, w) in &pipes {
1091 unsafe {
1092 libc::close(*r);
1093 libc::close(*w);
1094 }
1095 }
1096 return Value::Status(1);
1097 }
1098 0 => {
1099 // Reset SIGPIPE to default so a broken-pipe write
1100 // kills the child cleanly instead of triggering a
1101 // Rust println! panic. The parent shell ignores
1102 // SIGPIPE so it can handle EPIPE itself, but child
1103 // pipeline stages should die quietly when their
1104 // downstream stage closes early (e.g. `seq | head -3`).
1105 unsafe {
1106 libc::signal(libc::SIGPIPE, libc::SIG_DFL);
1107 }
1108 // Child: wire stdin from previous pipe's read end
1109 if i > 0 {
1110 unsafe {
1111 libc::dup2(pipes[i - 1].0, libc::STDIN_FILENO);
1112 }
1113 }
1114 // Wire stdout to next pipe's write end
1115 unsafe {
1116 libc::dup2(pipes[i].1, libc::STDOUT_FILENO);
1117 }
1118 // Close all original pipe fds (keeping stdin/stdout dups)
1119 for (r, w) in &pipes {
1120 unsafe {
1121 libc::close(*r);
1122 libc::close(*w);
1123 }
1124 }
1125
1126 // Run this stage's bytecode on a fresh VM
1127 let mut stage_vm = fusevm::VM::new(chunk.clone());
1128 register_builtins(&mut stage_vm);
1129 let _ = stage_vm.run();
1130 // Flush any buffered output before exiting
1131 let _ = std::io::stdout().flush();
1132 let _ = std::io::stderr().flush();
1133 std::process::exit(stage_vm.last_status);
1134 }
1135 pid => {
1136 child_pids.push(pid);
1137 }
1138 }
1139 }
1140
1141 // Parent runs the LAST stage inline. Save stdin, dup the last
1142 // pipe's read end onto fd 0, run the chunk, restore stdin.
1143 // Close every other pipe fd so the producer side gets EOF
1144 // when the last upstream stage exits.
1145 let saved_stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1146 if last_idx > 0 {
1147 let read_fd = pipes[last_idx - 1].0;
1148 unsafe {
1149 libc::dup2(read_fd, libc::STDIN_FILENO);
1150 }
1151 }
1152 // Close all pipe fds in the parent now that stdin is wired.
1153 // (Children already have their own copies. The dup2 above
1154 // already gave us a fresh fd 0 if needed.)
1155 for (r, w) in &pipes {
1156 unsafe {
1157 libc::close(*r);
1158 libc::close(*w);
1159 }
1160 }
1161
1162 // Run the last stage's bytecode on a sub-VM with the host
1163 // wired up. The host points back at the executor so reads
1164 // (`read x`) update the parent's variables directly.
1165 let last_stage_status = {
1166 let last_chunk = stages_vec.into_iter().last().unwrap();
1167 let mut stage_vm = fusevm::VM::new(last_chunk);
1168 register_builtins(&mut stage_vm);
1169 stage_vm.set_shell_host(Box::new(ZshrsHost));
1170 let _ = stage_vm.run();
1171 let _ = std::io::stdout().flush();
1172 let _ = std::io::stderr().flush();
1173 stage_vm.last_status
1174 };
1175
1176 // Restore stdin
1177 if saved_stdin >= 0 {
1178 unsafe {
1179 libc::dup2(saved_stdin, libc::STDIN_FILENO);
1180 libc::close(saved_stdin);
1181 }
1182 }
1183
1184 // Wait for all forked stages, capture per-stage statuses for PIPESTATUS.
1185 let mut pipestatus: Vec<i32> = Vec::with_capacity(n);
1186 for pid in child_pids {
1187 let mut status: i32 = 0;
1188 unsafe {
1189 libc::waitpid(pid, &mut status, 0);
1190 }
1191 let s = if libc::WIFEXITED(status) {
1192 libc::WEXITSTATUS(status)
1193 } else if libc::WIFSIGNALED(status) {
1194 128 + libc::WTERMSIG(status)
1195 } else {
1196 1
1197 };
1198 pipestatus.push(s);
1199 }
1200 // Append the in-parent last-stage status so `pipestatus` ends
1201 // with N entries (one per stage).
1202 pipestatus.push(last_stage_status);
1203 // Pipeline exit status: by default, the LAST stage's status.
1204 // With `setopt pipefail` (or `set -o pipefail`), use the
1205 // first non-zero stage status (so failures earlier in the
1206 // pipeline propagate even if the last stage succeeded).
1207 let pipefail_on =
1208 with_executor(|exec| crate::ported::options::opt_state_get("pipefail").unwrap_or(false));
1209 let last_status = if pipefail_on {
1210 pipestatus
1211 .iter()
1212 .copied()
1213 .rfind(|&s| s != 0)
1214 .or_else(|| pipestatus.last().copied())
1215 .unwrap_or(0)
1216 } else {
1217 *pipestatus.last().unwrap_or(&0)
1218 };
1219
1220 // Populate `pipestatus` (zsh) and `PIPESTATUS` (bash) arrays so
1221 // scripts can inspect per-stage exit codes. Both names are common
1222 // in user code; populating both removes a portability foot-gun.
1223 with_executor(|exec| {
1224 let strs: Vec<String> = pipestatus.iter().map(|s| s.to_string()).collect();
1225 exec.set_array("pipestatus".to_string(), strs.clone());
1226 exec.set_array("PIPESTATUS".to_string(), strs);
1227 });
1228
1229 Value::Status(last_status)
1230 });
1231
1232 // Array→String join. Pops one value; if it's an Array (e.g. from Op::Glob),
1233 // joins string-coerced elements with a single space. Pass-through for
1234 // non-arrays so the op is safe to chain after any String-or-Array producer.
1235 vm.register_builtin(BUILTIN_ARRAY_JOIN, |vm, _argc| {
1236 let val = vm.pop();
1237 match val {
1238 fusevm::Value::Array(items) => {
1239 let parts: Vec<String> = items.iter().map(|v| v.to_str()).collect();
1240 fusevm::Value::str(parts.join(" "))
1241 }
1242 other => other,
1243 }
1244 });
1245
1246 // `cmd &` background execution. Compile_list emits this for any item
1247 // followed by ListOp::Amp: the cmd is compiled into a sub-chunk, its index
1248 // pushed, then this builtin pops the index, looks up the chunk, forks. The
1249 // child detaches via setsid (so SIGINT to the foreground job doesn't kill
1250 // it), runs the bytecode on a fresh VM with builtins re-registered, exits
1251 // with the last status. The parent returns Status(0) immediately. Job
1252 // tracking via JobTable is deferred to Phase G6 — JobTable::add_job
1253 // currently requires a std::process::Child, which a libc::fork doesn't
1254 // produce. Until then, `jobs`/`fg`/`wait` can't see these pids.
1255 //WARNING FAKE AND MUST BE DELETED
1256 vm.register_builtin(BUILTIN_RUN_BG, |vm, _argc| {
1257 let sub_idx = vm.pop().to_int() as usize;
1258 let chunk = match vm.chunk.sub_chunks.get(sub_idx).cloned() {
1259 Some(c) => c,
1260 None => return Value::Status(1),
1261 };
1262
1263 match unsafe { libc::fork() } {
1264 -1 => Value::Status(1),
1265 0 => {
1266 // Child: detach and run.
1267 unsafe { libc::setsid() };
1268 let mut bg_vm = fusevm::VM::new(chunk);
1269 register_builtins(&mut bg_vm);
1270 let _ = bg_vm.run();
1271 let _ = std::io::stdout().flush();
1272 let _ = std::io::stderr().flush();
1273 std::process::exit(bg_vm.last_status);
1274 }
1275 pid => {
1276 // Parent: record the PID into `$!` (most recent
1277 // backgrounded job's pid). zsh exposes this for any
1278 // script that needs `wait $!`. Also register the
1279 // bare-pid job so a no-args `wait` can synchronize.
1280 with_executor(|exec| {
1281 exec.set_scalar("!".to_string(), pid.to_string());
1282 exec.jobs.add_pid_job(
1283 pid,
1284 String::new(),
1285 crate::exec_jobs::JobState::Running,
1286 );
1287 });
1288 Value::Status(0)
1289 }
1290 }
1291 });
1292
1293 // ── Indexed-array storage and access ──────────────────────────────────
1294 //
1295 // Two calling conventions:
1296 // 1. `arr=(a b c)` → push "a", "b", "c", "arr"; CallBuiltin(SET_ARRAY, 4).
1297 // 2. `arr=($(cmd))` → push FlatArray, "arr"; CallBuiltin(SET_ARRAY, 2)
1298 // where FlatArray is a Value::Array of words after BUILTIN_ARRAY_FLATTEN
1299 // + WORD_SPLIT processing.
1300 // Both end with name as the LAST arg. Values may be a single Value::Array
1301 // (in which case we extract its elements) or a sequence of strings.
1302 //WARNING FAKE AND MUST BE DELETED
1303 vm.register_builtin(BUILTIN_SET_ARRAY, |vm, argc| {
1304 let n = argc as usize;
1305 let mut popped: Vec<fusevm::Value> = Vec::with_capacity(n);
1306 for _ in 0..n {
1307 popped.push(vm.pop());
1308 }
1309 popped.reverse();
1310 if popped.is_empty() {
1311 return Value::Status(1);
1312 }
1313 let name = popped.pop().unwrap().to_str();
1314 let mut values: Vec<String> = Vec::new();
1315 for v in popped {
1316 match v {
1317 fusevm::Value::Array(items) => {
1318 for it in items {
1319 values.push(it.to_str());
1320 }
1321 }
1322 other => values.push(other.to_str()),
1323 }
1324 }
1325 let blocked = with_executor(|exec| {
1326 // Refuse to mutate read-only arrays (declare -ra / typeset
1327 // -ra). zsh prints `read-only variable: NAME` and exits 1
1328 // in -c mode. Mirror that fatal behavior.
1329 let is_ro = exec.is_readonly_param(&name);
1330 if is_ro {
1331 eprintln!("zshrs:1: read-only variable: {}", name);
1332 std::process::exit(1);
1333 }
1334 // Two-statement assoc init: `typeset -A m; m=(k v k v ...)`.
1335 if exec.assoc(&name).is_some() {
1336 // zsh: odd number of values -> `bad set of key/value
1337 // pairs for associative array` exit 1, no
1338 // assignment. zshrs's `if let Some(v) = it.next()`
1339 // silently dropped the orphaned key.
1340 if !values.len().is_multiple_of(2) {
1341 eprintln!("zshrs:1: bad set of key/value pairs for associative array");
1342 return true;
1343 }
1344 let mut map: IndexMap<String, String> = IndexMap::new();
1345 let mut it = values.clone().into_iter();
1346 while let Some(k) = it.next() {
1347 if let Some(v) = it.next() {
1348 map.insert(k, v);
1349 }
1350 }
1351 exec.set_assoc(name.clone(), map);
1352 // PFA-SMR aspect: assoc bulk init `h=(k1 v1 k2 v2 ...)`.
1353 // Recorder emits a structured assoc event with the
1354 // ordered (key, value) pairs preserved in
1355 // `value_assoc` so replay can reconstruct the assoc
1356 // exactly — insertion order matters because zsh
1357 // associative arrays are insertion-ordered (via
1358 // IndexMap on the executor side).
1359 #[cfg(feature = "recorder")]
1360 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
1361 let ctx = exec.recorder_ctx();
1362 let attrs = exec.recorder_attrs_for(&name);
1363 let mut pairs: Vec<(String, String)> = Vec::with_capacity(values.len() / 2);
1364 let mut iter = values.iter().cloned();
1365 while let Some(k) = iter.next() {
1366 if let Some(v) = iter.next() {
1367 pairs.push((k, v));
1368 }
1369 }
1370 crate::recorder::emit_assoc_assign(&name, pairs, attrs, false, ctx);
1371 }
1372 return false;
1373 }
1374 // Mirror array→scalar if name is the array side of a typeset -T tie.
1375 // `typeset -U arr` dedupes; first-wins per zsh.
1376 let is_unique = (exec.param_flags(&name) as u32 & crate::ported::zsh_h::PM_UNIQUE) != 0;
1377 if is_unique {
1378 let mut seen = std::collections::HashSet::new();
1379 values.retain(|v| seen.insert(v.clone()));
1380 }
1381 if let Some((scalar_name, sep)) = exec.tied_array_to_scalar.get(&name).cloned() {
1382 let joined = values.join(&sep);
1383 exec.set_scalar(scalar_name, joined);
1384 exec.set_array(name.clone(), values.clone());
1385 } else {
1386 exec.set_array(name.clone(), values.clone());
1387 }
1388 // PFA-SMR aspect: array SET (`name=(...)`). emit_path_or_assign
1389 // routes path-family names to per-element path_mod events
1390 // and everything else to one structured array `assign`
1391 // event with value_array = ordered elements (replay-safe).
1392 #[cfg(feature = "recorder")]
1393 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
1394 let ctx = exec.recorder_ctx();
1395 let attrs = exec.recorder_attrs_for(&name);
1396 emit_path_or_assign(&name, &values, attrs, false, &ctx);
1397 }
1398 false
1399 });
1400 Value::Status(if blocked { 1 } else { 0 })
1401 });
1402 // `arr+=(d e f)` — append. Same calling conventions as SET_ARRAY.
1403 //WARNING FAKE AND MUST BE DELETED
1404 vm.register_builtin(BUILTIN_APPEND_ARRAY, |vm, argc| {
1405 let n = argc as usize;
1406 let mut popped: Vec<fusevm::Value> = Vec::with_capacity(n);
1407 for _ in 0..n {
1408 popped.push(vm.pop());
1409 }
1410 popped.reverse();
1411 if popped.is_empty() {
1412 return Value::Status(1);
1413 }
1414 let name = popped.pop().unwrap().to_str();
1415 let mut values: Vec<String> = Vec::new();
1416 for v in popped {
1417 match v {
1418 fusevm::Value::Array(items) => {
1419 for it in items {
1420 values.push(it.to_str());
1421 }
1422 }
1423 other => values.push(other.to_str()),
1424 }
1425 }
1426 with_executor(|exec| {
1427 // Refuse appends on read-only arrays (declare -ra).
1428 let is_ro = exec.is_readonly_param(&name);
1429 if is_ro {
1430 eprintln!("zshrs:1: read-only variable: {}", name);
1431 std::process::exit(1);
1432 }
1433 // Assoc-aware append: `typeset -A m; m+=(k1 v1 k2 v2 ...)`
1434 // adds key/value pairs. Without this, the values were
1435 // appended to a parallel array and `${m[k]}` lookup missed
1436 // the new keys entirely.
1437 if exec.assoc(&name).is_some() {
1438 let mut map = exec.assoc(&name).unwrap_or_default();
1439 let mut it = values.into_iter();
1440 while let Some(k) = it.next() {
1441 if let Some(v) = it.next() {
1442 map.insert(k, v);
1443 }
1444 }
1445 exec.set_assoc(name, map);
1446 return;
1447 }
1448 // `typeset -U arr` dedupes — append must respect existing
1449 // elements too. Skip values that are already present.
1450 // PFA-SMR aspect: array APPEND (`name+=(...)`). Same
1451 // routing as SET_ARRAY but with is_append=true so the
1452 // event carries the APPEND attr bit for replay.
1453 #[cfg(feature = "recorder")]
1454 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
1455 let ctx = exec.recorder_ctx();
1456 let attrs = exec.recorder_attrs_for(&name);
1457 emit_path_or_assign(&name, &values, attrs, true, &ctx);
1458 }
1459 let is_unique = (exec.param_flags(&name) as u32 & crate::ported::zsh_h::PM_UNIQUE) != 0;
1460 // Mirror the post-append result back to a tied scalar
1461 // (`typeset -T PATH path :` — `path+=(/x)` must update
1462 // `PATH` too). Without this, zinit / OMZ patterns like
1463 // `path+=(/some/dir)` left $PATH stale, so `command -v`
1464 // / pathprog lookups missed newly-added dirs.
1465 let tied_scalar = exec.tied_array_to_scalar.get(&name).cloned();
1466 // Read current via canonical exec.array (paramtab-first),
1467 // mutate, then write back via set_array which writes both
1468 // paramtab and the legacy cache.
1469 let mut target = exec.array(&name).unwrap_or_default();
1470 if is_unique {
1471 let existing: std::collections::HashSet<String> = target.iter().cloned().collect();
1472 for v in values {
1473 if !existing.contains(&v) {
1474 target.push(v);
1475 }
1476 }
1477 } else {
1478 target.extend(values);
1479 }
1480 exec.set_array(name.clone(), target);
1481 if let Some((scalar_name, sep)) = tied_scalar {
1482 let joined = exec
1483 .array(&name)
1484 .map(|a| a.join(&sep))
1485 .unwrap_or_default();
1486 exec.set_scalar(scalar_name.clone(), joined.clone());
1487 // Keep the env var (PATH / FPATH / MANPATH / …) in
1488 // sync with the scalar so child processes see the
1489 // change.
1490 std::env::set_var(&scalar_name, &joined);
1491 }
1492 });
1493 Value::Status(0)
1494 });
1495
1496 // `select var in words; do body; done` — interactive menu loop. Stack
1497 // discipline (top-down): sub_chunk_idx (Int), var_name (str), word_N..word_1.
1498 // Argc = words_count + 2. We pop in reverse order: idx first, then name,
1499 // then words back to source order via reverse().
1500 //
1501 // Loop body:
1502 // 1. Print numbered menu to stderr.
1503 // 2. Print PROMPT3 (default "?# ") to stderr.
1504 // 3. Read line from stdin.
1505 // 4. EOF (read fails) → break, return Status(0).
1506 // 5. Empty line → redraw menu, loop.
1507 // 6. Numeric input in 1..=N → set var, run sub-chunk, capture status,
1508 // redraw menu, loop.
1509 // 7. Anything else → set var to "" (zsh convention), run sub-chunk,
1510 // redraw menu, loop. The body sees REPLY = the raw input.
1511 //
1512 // `break` inside the body short-circuits via the sub-chunk's own bytecode
1513 // (the break_patches mechanism). When the sub-chunk halts via break it
1514 // returns from VM::run; we treat any non-zero status as "loop should
1515 // exit"? No — break sets a flag in the chunk-level patches. Since we're
1516 // running the body in a fresh VM each iteration, break needs a different
1517 // signaling mechanism. For now: the body's bytecode can do `return 99`
1518 // which we recognize as a "user wants out" signal. zsh's `break` works
1519 // in select via the same loop-control mechanism as for/while. Phase G6
1520 // follow-up.
1521 //WARNING FAKE AND MUST BE DELETED
1522 vm.register_builtin(BUILTIN_RUN_SELECT, |vm, argc| {
1523
1524 if argc < 2 {
1525 return Value::Status(1);
1526 }
1527 let n = argc as usize;
1528 let mut popped: Vec<fusevm::Value> = Vec::with_capacity(n);
1529 for _ in 0..n {
1530 popped.push(vm.pop());
1531 }
1532 // popped: [sub_idx, name, word_N, ..., word_1] (popping from top)
1533 let sub_idx_val = popped.remove(0);
1534 let name_val = popped.remove(0);
1535 let mut words: Vec<String> = popped.into_iter().rev().map(|v| v.to_str()).collect();
1536 // Flatten any Value::Array elements (e.g. `select x in $arr; ...`).
1537 let mut flat = Vec::with_capacity(words.len());
1538 for w in words.drain(..) {
1539 // The pop above already to_str()'d, so Array splice is lost. Re-
1540 // pop wouldn't help — the host receives flat strings here. This is
1541 // OK for now since the compile path uses ARRAY_FLATTEN-equivalent
1542 // reasoning before the call. If splice support is needed, the
1543 // compile path should call BUILTIN_ARRAY_FLATTEN first.
1544 flat.push(w);
1545 }
1546 let words = flat;
1547
1548 let sub_idx = sub_idx_val.to_int() as usize;
1549 let name = name_val.to_str();
1550 let chunk = match vm.chunk.sub_chunks.get(sub_idx).cloned() {
1551 Some(c) => c,
1552 None => return Value::Status(1),
1553 };
1554
1555 let prompt = with_executor(|exec| {
1556 exec.scalar("PROMPT3")
1557 .unwrap_or_else(|| "?# ".to_string())
1558 });
1559
1560 let stdin = std::io::stdin();
1561 let mut reader = stdin.lock();
1562 let mut last_status: i32 = 0;
1563
1564 loop {
1565 // Direct port of zsh's selectlist from
1566 // src/zsh/Src/loop.c:347-409. Layout is column-major
1567 // ("down columns, then across") — NOT row-major. With
1568 // 6 items in 3 cols zsh produces:
1569 // 1 3 5
1570 // 2 4 6
1571 // The previous Rust impl walked row-major which
1572 // produced 1 2 3 / 4 5 6 (visually similar but wrong
1573 // for prompts that mention ordering and breaks scripts
1574 // that rely on column count == ceil(N/rows)).
1575 //
1576 // C variable mapping:
1577 // ct -> word count (n)
1578 // longest -> max item width + 1, then plus digits-of-ct
1579 // fct -> column count
1580 // fw -> per-column width
1581 // colsz -> row count = ceil(ct / fct)
1582 // t1 -> row index, walks 0..colsz
1583 // ap -> item pointer; advances by colsz to step
1584 // DOWN a column.
1585 let term_width: usize = std::env::var("COLUMNS")
1586 .ok()
1587 .and_then(|v| v.parse().ok())
1588 .unwrap_or(80);
1589 let ct = words.len();
1590 // loop.c:354-363 — find longest item width.
1591 let mut longest = 1usize;
1592 for w in &words {
1593 let aplen = w.chars().count();
1594 if aplen > longest {
1595 longest = aplen;
1596 }
1597 }
1598 // loop.c:365-367 — `longest++` then add digits of `ct`.
1599 longest += 1;
1600 let mut t0 = ct;
1601 while t0 > 0 {
1602 t0 /= 10;
1603 longest += 1;
1604 }
1605 // loop.c:369-373 — fct = (cols - 1) / (longest + 3); if
1606 // 0, fct = 1; else fw = (cols - 1) / fct.
1607 let raw_fct = (term_width.saturating_sub(1)) / (longest + 3);
1608 let (fct, fw) = if raw_fct == 0 {
1609 (1, longest + 3)
1610 } else {
1611 (raw_fct, (term_width.saturating_sub(1)) / raw_fct)
1612 };
1613 // loop.c:374 — colsz = (ct + fct - 1) / fct.
1614 let colsz = ct.div_ceil(fct);
1615 // loop.c:375-395 — for each row t1, walk down columns.
1616 for t1 in 0..colsz {
1617 let mut ap_idx = t1;
1618 while ap_idx < ct {
1619 let w = &words[ap_idx];
1620 let n = ap_idx + 1;
1621 let _ = write!(std::io::stderr(), "{}) {}", n, w);
1622 let mut t2 = w.chars().count() + 2;
1623 let mut t3 = n;
1624 while t3 > 0 {
1625 t2 += 1;
1626 t3 /= 10;
1627 }
1628 // Pad to fw (loop.c:389-390).
1629 while t2 < fw {
1630 let _ = write!(std::io::stderr(), " ");
1631 t2 += 1;
1632 }
1633 ap_idx += colsz;
1634 }
1635 let _ = writeln!(std::io::stderr());
1636 }
1637 let _ = write!(std::io::stderr(), "{}", prompt);
1638 let _ = std::io::stderr().flush();
1639
1640 let mut line = String::new();
1641 match reader.read_line(&mut line) {
1642 Ok(0) => break, // EOF
1643 Ok(_) => {}
1644 Err(_) => break,
1645 }
1646 let trimmed = line.trim_end_matches(['\n', '\r'][..].as_ref()).to_string();
1647
1648 with_executor(|exec| {
1649 exec.set_scalar("REPLY".to_string(), trimmed.clone());
1650 });
1651
1652 if trimmed.is_empty() {
1653 // Empty input → redraw menu without running body.
1654 continue;
1655 }
1656
1657 let chosen = match trimmed.parse::<usize>() {
1658 Ok(n) if n >= 1 && n <= words.len() => words[n - 1].clone(),
1659 _ => String::new(),
1660 };
1661
1662 with_executor(|exec| {
1663 exec.set_scalar(name.clone(), chosen);
1664 });
1665
1666 // Reset the loop signal before running the body so a stale
1667 // value from a sibling construct doesn't leak in.
1668 with_executor(|exec| exec.loop_signal = None);
1669
1670 let mut body_vm = fusevm::VM::new(chunk.clone());
1671 register_builtins(&mut body_vm);
1672 let _ = body_vm.run();
1673 last_status = body_vm.last_status;
1674
1675 // Drain the cross-VM loop-control signal. `break` from inside
1676 // the body sets LoopSignal::Break; `continue` sets Continue.
1677 // The legacy `BREAK_SELECT=1` env-var sentinel is still honored
1678 // for backward compat with scripts written before the keyword
1679 // path landed.
1680 let signal = with_executor(|exec| exec.loop_signal.take());
1681 let break_legacy = with_executor(|exec| {
1682 let v = exec.scalar("BREAK_SELECT");
1683 exec.unset_scalar("BREAK_SELECT");
1684 v.map(|s| s != "0" && !s.is_empty()).unwrap_or(false)
1685 });
1686 match signal {
1687 Some(LoopSignal::Break) => break,
1688 Some(LoopSignal::Continue) => continue,
1689 None if break_legacy => break,
1690 None => {}
1691 }
1692 }
1693
1694 Value::Status(last_status)
1695 });
1696
1697 // Magic special-parameter assoc lookup. Synthesizes values from
1698 // shell state for zsh's shell-introspection assocs:
1699 // commands, aliases, galiases, saliases, dis_aliases, dis_galiases,
1700 // dis_saliases, functions, dis_functions, builtins, dis_builtins,
1701 // reswords, options, parameters, jobtexts, jobdirs, jobstates,
1702 // nameddirs, userdirs, modules.
1703 // Returns None if `name` isn't a recognized magic name.
1704 //WARNING FAKE AND MUST BE DELETED
1705 fn magic_assoc_lookup(name: &str, idx: &str) -> Option<Value> {
1706 // Subscript-flag lookup `(r)pat` / `(R)pat` / `(i)pat` /
1707 // `(I)pat` on a magic-assoc — synthesize the (key,value)
1708 // pair list from get_special_array_value and route through
1709 // the assoc-flag matcher (same path real assocs use).
1710 // Direct port of Src/params.c getarg's hash-aware index/
1711 // match handling — without this, `${aliases[(I)foo*]}` and
1712 // friends were passing the literal `(I)foo*` text through
1713 // as the key.
1714 // Magic-assoc subscript flags (I)/(R)/(i)/(r): parse the
1715 // leading `(...)` flag tag and dispatch by-key (I/i) or
1716 // by-value (R/r) glob match. Capital = return all matches
1717 // joined by space; lowercase = return first only.
1718 // Direct port of Src/params.c getarg path which routes
1719 // hash subscripts through pattern matching when the flag
1720 // tag is present.
1721 let parsed_flags: Option<(String, String)> = (|s: &str| {
1722 let s = s.trim_start();
1723 let rest = s.strip_prefix('(')?;
1724 let close = rest.find(')')?;
1725 let flags = rest[..close].to_string();
1726 let pat = rest[close + 1..].to_string();
1727 if flags.chars().next().is_some_and(|c| matches!(c, 'I' | 'R' | 'i' | 'r' | 'k' | 'K' | 'n' | 'e' | 'b' | 'w' | 'f' | 'p' | 's')) {
1728 Some((flags, pat))
1729 } else { None }
1730 })(idx);
1731 if let Some((flags, pat)) = parsed_flags.clone() {
1732 let pairs = with_executor(|exec| -> Option<Vec<(String, String)>> {
1733 let keys = crate::exec::scan_magic_assoc_keys(name)?;
1734 Some(keys
1735 .into_iter()
1736 .map(|k| {
1737 let v = exec
1738 .get_special_array_value(name, &k)
1739 .unwrap_or_default();
1740 (k, v)
1741 })
1742 .collect())
1743 });
1744 if let Some(pairs) = pairs {
1745 let by_key = flags.contains('I') || flags.contains('i');
1746 let return_all = flags.contains('I') || flags.contains('R');
1747 let mut out: Vec<String> = Vec::new();
1748 for (k, v) in &pairs {
1749 let hay = if by_key { k } else { v };
1750 if crate::exec::glob_match_static(hay, &pat) {
1751 out.push(if by_key { k.clone() } else { v.clone() });
1752 if !return_all { break; }
1753 }
1754 }
1755 return Some(Value::str(out.join(" ")));
1756 }
1757 }
1758 with_executor(|exec| -> Option<Value> {
1759 match name {
1760 "commands" => {
1761 // Canonical command-hash is `cmdnamtab` — HASHED
1762 // entries store their resolved path in `cmd`.
1763 let tab = crate::ported::hashtable::cmdnamtab_lock();
1764 if idx == "@" || idx == "*" {
1765 return Some(Value::Array(
1766 tab.read().ok()
1767 .map(|t| t.iter()
1768 .filter_map(|(_, c)| c.cmd.clone())
1769 .map(Value::str)
1770 .collect())
1771 .unwrap_or_default(),
1772 ));
1773 }
1774 Some(Value::str(
1775 tab.read().ok()
1776 .and_then(|t| t.get_full_path(idx).map(|p| p.display().to_string()))
1777 .unwrap_or_else(|| {
1778 // Fall back to PATH scan for first match
1779 for dir in env::var("PATH").unwrap_or_default().split(':') {
1780 let p = std::path::PathBuf::from(dir).join(idx);
1781 if p.is_file() {
1782 return p.to_string_lossy().into_owned();
1783 }
1784 }
1785 String::new()
1786 }),
1787 ))
1788 }
1789 "aliases" | "galiases" | "saliases" => Some(Value::str(
1790 exec.get_special_array_value(name, idx).unwrap_or_default(),
1791 )),
1792 "functions" => {
1793 if let Some(text) = exec.function_definition_text(idx) {
1794 // zsh's `$functions[name]` returns the function
1795 // body with each statement on its own line and a
1796 // leading TAB on every line (no trailing `;`).
1797 // Was returning the raw user-typed source which
1798 // diverges on indent and terminator. Direct port
1799 // of Src/exec.c's `getfn_functions` formatter.
1800 let formatted = FuncBodyFmt::render(text.trim());
1801 Some(Value::str(format!("\t{}", formatted)))
1802 } else {
1803 Some(Value::str(""))
1804 }
1805 }
1806 "dis_functions" => {
1807 // Disabled functions table — zshrs tracks via autoload_pending
1808 // for the autoload-but-not-loaded case; full disable list
1809 // would need a separate table. For now: empty unless
1810 // explicitly disabled.
1811 Some(Value::str(""))
1812 }
1813 "builtins" => {
1814 // Return "defined" for known builtins; empty for unknown
1815 let known = matches!(
1816 idx,
1817 "echo"
1818 | "print"
1819 | "printf"
1820 | "cd"
1821 | "pwd"
1822 | "exit"
1823 | "return"
1824 | "true"
1825 | "false"
1826 | ":"
1827 | "test"
1828 | "["
1829 | "local"
1830 | "private"
1831 | "declare"
1832 | "typeset"
1833 | "read"
1834 | "shift"
1835 | "eval"
1836 | "alias"
1837 | "unalias"
1838 | "set"
1839 | "unset"
1840 | "export"
1841 | "source"
1842 | "."
1843 | "history"
1844 | "fc"
1845 | "jobs"
1846 | "fg"
1847 | "bg"
1848 | "kill"
1849 | "wait"
1850 | "trap"
1851 | "ulimit"
1852 | "umask"
1853 | "hash"
1854 | "unhash"
1855 | "type"
1856 | "whence"
1857 | "which"
1858 | "where"
1859 | "command"
1860 | "builtin"
1861 | "exec"
1862 | "getopts"
1863 | "let"
1864 | "setopt"
1865 | "unsetopt"
1866 | "emulate"
1867 | "zstyle"
1868 | "compdef"
1869 | "compadd"
1870 | "compinit"
1871 | "compset"
1872 );
1873 if known {
1874 Some(Value::str("defined"))
1875 } else {
1876 Some(Value::str(""))
1877 }
1878 }
1879 "reswords" => {
1880 let known = matches!(
1881 idx,
1882 "if" | "then"
1883 | "elif"
1884 | "else"
1885 | "fi"
1886 | "for"
1887 | "do"
1888 | "done"
1889 | "while"
1890 | "until"
1891 | "case"
1892 | "esac"
1893 | "in"
1894 | "function"
1895 | "select"
1896 | "time"
1897 | "{"
1898 | "}"
1899 | "[["
1900 | "]]"
1901 | "!"
1902 | "coproc"
1903 | "always"
1904 | "foreach"
1905 | "end"
1906 | "repeat"
1907 | "nocorrect"
1908 | "noglob"
1909 | "declare"
1910 | "typeset"
1911 | "local"
1912 | "readonly"
1913 | "export"
1914 | "integer"
1915 | "float"
1916 );
1917 if known {
1918 Some(Value::str("reserved"))
1919 } else {
1920 Some(Value::str(""))
1921 }
1922 }
1923 "options" => {
1924 let opt_name = idx.to_lowercase().replace('_', "");
1925 Some(Value::str(
1926 if crate::ported::options::opt_state_get(&opt_name).unwrap_or(false) {
1927 "on"
1928 } else {
1929 "off"
1930 },
1931 ))
1932 }
1933 "parameters" => {
1934 // ${parameters[name]} returns the type with all
1935 // attributes joined by `-`. Delegates to
1936 // `get_special_array_value` which reads PM_TYPE
1937 // / PM_LOWER / PM_READONLY / etc. flags from the
1938 // canonical paramtab entry.
1939 Some(Value::str(
1940 exec.get_special_array_value("parameters", idx)
1941 .unwrap_or_default()))
1942 }
1943 "jobtexts" => {
1944 let job_id: usize = idx.parse().ok()?;
1945 Some(Value::str(
1946 exec.jobs
1947 .get(job_id)
1948 .map(|j| j.command.clone())
1949 .unwrap_or_default(),
1950 ))
1951 }
1952 "jobdirs" => {
1953 let _job_id: usize = idx.parse().ok()?;
1954 // Per-job working dir not tracked; return current cwd as
1955 // a useful approximation (zsh tracks it; we don't yet).
1956 Some(Value::str(
1957 std::env::current_dir()
1958 .ok()
1959 .and_then(|p| p.to_str().map(String::from))
1960 .unwrap_or_default(),
1961 ))
1962 }
1963 "jobstates" => {
1964 let job_id: usize = idx.parse().ok()?;
1965 Some(Value::str(
1966 exec.jobs
1967 .get(job_id)
1968 .map(|j| match j.state {
1969 JobState::Running => "running".to_string(),
1970 JobState::Stopped => "stopped".to_string(),
1971 JobState::Done => "done".to_string(),
1972 })
1973 .unwrap_or_default(),
1974 ))
1975 }
1976 "nameddirs" => Some(Value::str(
1977 crate::ported::hashnameddir::nameddirtab()
1978 .lock().ok()
1979 .and_then(|g| g.get(idx).map(|nd| nd.dir.clone()))
1980 .unwrap_or_default(),
1981 )),
1982 //WARNING FAKE AND MUST BE DELETED
1983 "userdirs" => {
1984 // ~user → home dir lookup via /etc/passwd. No caching;
1985 // each lookup hits getpwnam.
1986 let c_user = match std::ffi::CString::new(idx) {
1987 Ok(c) => c,
1988 Err(_) => return Some(Value::str("")),
1989 };
1990 let pw = unsafe { libc::getpwnam(c_user.as_ptr()) };
1991 if pw.is_null() {
1992 Some(Value::str(""))
1993 } else {
1994 let home_ptr = unsafe { (*pw).pw_dir };
1995 if home_ptr.is_null() {
1996 return Some(Value::str(""));
1997 }
1998 let home = unsafe { std::ffi::CStr::from_ptr(home_ptr) };
1999 Some(Value::str(home.to_string_lossy().into_owned()))
2000 }
2001 }
2002 "modules" => {
2003 // Loaded modules — compiled-in always-loaded plus
2004 // anything zmodload registered via the
2005 // `_module_<name>` option flag (see
2006 // bin_zmodload). Same source as the
2007 // magic_assoc_lookup path so both `${modules[X]}`
2008 // and `${(t)modules[X]}` agree.
2009 const ALWAYS_LOADED: &[&str] = &[
2010 "zsh/datetime",
2011 "zsh/sched",
2012 "zsh/zutil",
2013 "zsh/parameter",
2014 "zsh/files",
2015 "zsh/complete",
2016 "zsh/complist",
2017 "zsh/regex",
2018 "zsh/system",
2019 "zsh/stat",
2020 "zsh/net/tcp",
2021 "zsh/net/socket",
2022 "zsh/private",
2023 "zsh/zftp",
2024 "zsh/zselect",
2025 "zsh/zle",
2026 "zsh/random",
2027 "zsh/pcre",
2028 "zsh/db/gdbm",
2029 "zsh/cap",
2030 "zsh/clone",
2031 "zsh/curses",
2032 "zsh/mapfile",
2033 "zsh/nearcolor",
2034 "zsh/newuser",
2035 "zsh/mathfunc",
2036 "zsh/termcap",
2037 "zsh/terminfo",
2038 "zsh/profiler",
2039 ];
2040 let loaded = ALWAYS_LOADED.contains(&idx)
2041 || crate::ported::options::opt_state_get(&format!("_module_{}", idx))
2042 .unwrap_or(false);
2043 Some(Value::str(if loaded { "loaded" } else { "" }))
2044 }
2045 //WARNING FAKE AND MUST BE DELETED
2046 "patchars" => Some(Value::str("*?[]<>(){}|^&;")),
2047 "widgets" => {
2048 // ${widgets[name]} → 'builtin' or 'user:func' per
2049 // zleparameter.c widgets_*. Mirrors the
2050 // magic_assoc_lookup path so both lookup sites
2051 // agree.
2052 if let Some(target) = getwidgettarget(idx) {
2053 if target == idx {
2054 Some(Value::str("builtin"))
2055 } else {
2056 Some(Value::str(format!("user:{}", target)))
2057 }
2058 } else {
2059 Some(Value::str(""))
2060 }
2061 }
2062 "keymaps" => {
2063 // ${keymaps[name]} → "1" or "" per zleparameter.c
2064 // keymaps_*. Same canonical seven names as the
2065 // magic_assoc path.
2066 let known = matches!(
2067 idx,
2068 "main" | "emacs" | "viins" | "vicmd" | "isearch" | "command" | "menuselect"
2069 );
2070 if known {
2071 Some(Value::str("1"))
2072 } else {
2073 Some(Value::str(""))
2074 }
2075 }
2076 "mapfile" => {
2077 // zsh/mapfile module: `${mapfile[/path]}` reads a
2078 // file's bytes verbatim. Trailing newline is
2079 // preserved (verified against real zsh: a one-line
2080 // "test\n" file gives len=5, not 4). Downstream
2081 // (f)/(@f) flags handle the trailing-newline split.
2082 if idx == "@" || idx == "*" {
2083 // Splice: not meaningful for mapfile (the whole
2084 // filesystem isn't enumerable). Return empty.
2085 return Some(Value::Array(vec![]));
2086 }
2087 match std::fs::read_to_string(idx) {
2088 Ok(s) => Some(Value::str(s)),
2089 Err(_) => Some(Value::str("")),
2090 }
2091 }
2092 "sysparams" => {
2093 // zsh/system module: `${sysparams[KEY]}` magic
2094 // assoc with three keys per zshmodules(1): `pid`,
2095 // `ppid`, `procsubstpid`. Returns the appropriate
2096 // process ID. Splice form returns the value list.
2097 let pid_str = std::process::id().to_string();
2098 let ppid_str = unsafe { libc::getppid() }.to_string();
2099 if idx == "@" || idx == "*" {
2100 return Some(Value::Array(vec![
2101 Value::str(pid_str),
2102 Value::str(ppid_str),
2103 ]));
2104 }
2105 match idx {
2106 "pid" => Some(Value::str(pid_str)),
2107 "ppid" => Some(Value::str(ppid_str)),
2108 "procsubstpid" => Some(Value::str("0")),
2109 _ => Some(Value::str("")),
2110 }
2111 }
2112 "epochtime" => {
2113 // zsh/datetime — `${epochtime}` is a 2-element
2114 // indexed array: [seconds, nanoseconds] from
2115 // clock_gettime(CLOCK_REALTIME). Direct port of
2116 // the `epochtimegetfn` accessor in
2117 // Src/Modules/datetime.c (struct gsu_array).
2118 let (secs, nsecs) = SystemTime::now()
2119 .duration_since(UNIX_EPOCH)
2120 .map(|d| (d.as_secs() as i64, d.subsec_nanos() as i64))
2121 .unwrap_or((0, 0));
2122 if idx == "@" || idx == "*" {
2123 return Some(Value::Array(vec![
2124 Value::str(secs.to_string()),
2125 Value::str(nsecs.to_string()),
2126 ]));
2127 }
2128 if let Ok(n) = idx.parse::<i64>() {
2129 let pos = if n > 0 {
2130 (n - 1) as usize
2131 } else if n < 0 {
2132 let p = 2 + n;
2133 if p < 0 {
2134 return Some(Value::str(""));
2135 }
2136 p as usize
2137 } else {
2138 return Some(Value::str(""));
2139 };
2140 return match pos {
2141 0 => Some(Value::str(secs.to_string())),
2142 1 => Some(Value::str(nsecs.to_string())),
2143 _ => Some(Value::str("")),
2144 };
2145 }
2146 Some(Value::str(""))
2147 }
2148 "termcap" => {
2149 // `${termcap[cap]}` — direct port of
2150 // `gettermcap()` from Src/Modules/termcap.c:144.
2151 // Backed by ncurses' termcap-emulation API
2152 // (`tgetent`/`tgetstr`/`tgetnum`/`tgetflag`)
2153 // which resolves from the same database
2154 // `${terminfo[…]}` uses but with the legacy
2155 // 2-letter cap names.
2156 Some(Value::str(
2157 crate::modules::termcap::gettermcap(idx).unwrap_or_default(),
2158 ))
2159 }
2160 "terminfo" => {
2161 // `${terminfo[capname]}` — direct port of
2162 // `getterminfo()` from Src/Modules/terminfo.c:135.
2163 // Lazy ncurses tigetstr/tigetnum/tigetflag lookup
2164 // for any capability the script names. The
2165 // executor also pre-seeds the common subset
2166 // into `assoc_arrays["terminfo"]` so
2167 // `${(k)terminfo}` enumerates the seeded names —
2168 // but the magic-assoc path runs FIRST (per the
2169 // `user_defined_assoc` gate at line 3108), so
2170 // for INDEX lookups we always reach `lookup()`
2171 // and uncommon caps like `bel` resolve correctly.
2172 Some(Value::str(
2173 crate::modules::terminfo::getterminfo(idx).unwrap_or_default(),
2174 ))
2175 }
2176 "errnos" => {
2177 // zsh/system module: `${errnos[N]}` is an INDEXED
2178 // array of errno-name strings, 1-based. Direct
2179 // port of the `SPECIALPMDEF("errnos", PM_ARRAY|
2180 // PM_READONLY, …)` entry at
2181 // Src/Modules/system.c:902 + the `errnosgetfn`
2182 // accessor at line 832 (which returns
2183 // `arrdup((char **)sys_errnames)`). Splice (`@`/
2184 // `*`) returns the whole platform-specific list
2185 // as a Value::Array; numeric subscript returns
2186 // the matching name (or "" for unknown).
2187 let table = crate::modules::system::ERRNO_NAMES;
2188 if idx == "@" || idx == "*" {
2189 return Some(Value::Array(
2190 table.iter().map(|(n, _)| Value::str(*n)).collect(),
2191 ));
2192 }
2193 if let Ok(n) = idx.parse::<i64>() {
2194 // 1-based. Negative indices count from end.
2195 let len = table.len() as i64;
2196 let pos = if n > 0 {
2197 (n - 1) as usize
2198 } else if n < 0 {
2199 let p = len + n;
2200 if p < 0 {
2201 return Some(Value::str(""));
2202 }
2203 p as usize
2204 } else {
2205 return Some(Value::str(""));
2206 };
2207 if let Some((name, _)) = table.get(pos) {
2208 return Some(Value::str(*name));
2209 }
2210 }
2211 Some(Value::str(""))
2212 }
2213 // `langinfo` — port of zsh/langinfo module
2214 // (src/zsh/Src/Modules/langinfo.c:402-449). Read-
2215 // only assoc keyed by nl_item names (CODESET,
2216 // D_FMT, RADIXCHAR, etc.); each lookup goes through
2217 // nl_langinfo(3). Splice (`@`/`*`) returns all the
2218 // names known to the module's static table.
2219 "langinfo" => {
2220 if idx == "@" || idx == "*" {
2221 return Some(Value::Array(
2222 crate::langinfo::NL_NAMES
2223 .iter()
2224 .map(|s| Value::str(*s))
2225 .collect(),
2226 ));
2227 }
2228 let val = crate::langinfo::getlanginfo(idx).unwrap_or_default();
2229 Some(Value::str(val))
2230 }
2231 // `.zle.esc` and `.zle.sgr` — port of zsh/hlgroup
2232 // module (src/zsh/Src/Modules/hlgroup.c:81-165).
2233 // Both back into the user's `.zle.hlgroups` assoc.
2234 // `.zle.esc[name]` returns the FULL escape sequence
2235 // for the highlight-group; `.zle.sgr[name]` returns
2236 // just the digit run (after stripping `\033[` and
2237 // trailing `m`). hlgroup.c:39-78 convertattr does
2238 // both modes.
2239 ".zle.esc" | ".zle.sgr" => {
2240 let sgr = name == ".zle.sgr";
2241 // Look up `.zle.hlgroups[idx]` — the user's
2242 // attribute string per hlgroup.c:96-99 (var =
2243 // GROUPVAR i.e. ".zle.hlgroups").
2244 let attr = exec
2245 .assoc(".zle.hlgroups")
2246 .and_then(|m| m.get(idx).cloned())
2247 .unwrap_or_default();
2248 if attr.is_empty() {
2249 // Per hlgroup.c:101-103, missing/unset entry
2250 // returns an empty string (PM_UNSET).
2251 return Some(Value::str(""));
2252 }
2253 let converted = crate::hlgroup::convertattr(&attr, sgr);
2254 Some(Value::str(converted))
2255 }
2256 _ => None,
2257 }
2258 })
2259 }
2260
2261 // `${arr[idx]}` — pop name, then idx_str. zsh is 1-based for positive
2262 // indices; we honor that. `@`/`*` return the whole array as Value::Array
2263 // so Op::Exec splice produces N argv slots. For `${foo[key]}` where foo
2264 // is an assoc, the idx is a string key — we check assoc_arrays first
2265 // when the idx isn't `@`/`*` and the name has an assoc binding.
2266 // WARNING FAKE AND MUST BE DELETED
2267 vm.register_builtin(BUILTIN_ARRAY_INDEX, |vm, _argc| {
2268 let mut idx = vm.pop().to_str();
2269 let name = vm.pop().to_str();
2270 // `\u{02}` prefix on idx = "compile-time DQ context" — set by
2271 // the compile_zsh fast path when the ${arr[KEY]} appeared
2272 // inside `"…"`. The runtime needs this to decide whether
2273 // a `[N,M]` range slice should join (DQ) or stay as array
2274 // (unquoted). The mode-1 BUILTIN_EXPAND_TEXT bridge already
2275 // bumps `exec.in_dq_context`, so detect either signal.
2276 let dq_compile = idx.starts_with('\u{02}');
2277 if dq_compile {
2278 idx = idx[1..].to_string();
2279 }
2280 // `\u{05}` prefix on idx = "(@) flag is set in surrounding
2281 // flag chain" — emitted by parse_zsh_flag_subscript when the
2282 // outer flag chain contains `@`. Direct port of zsh's
2283 // nojoin behavior: `(@)` overrides the DQ-join even inside
2284 // `"…"`. When this sentinel is present, force array shape
2285 // for slices regardless of in_dq_context.
2286 let force_array = idx.starts_with('\u{05}');
2287 if force_array {
2288 idx = idx[1..].to_string();
2289 }
2290 // `\u{06}` prefix = "outer (v) flag wants values for matching
2291 // assoc keys" — flip the (I)/(i) subscript-flag from
2292 // returning keys to returning the corresponding values.
2293 // Direct port of zsh's (v)+(I) combo.
2294 let flip_to_values = idx.starts_with('\u{06}');
2295 if flip_to_values {
2296 idx = idx[1..].to_string();
2297 }
2298 // `\u{07}` prefix = "outer (k) flag wants keys for matching
2299 // assoc values" — flip the (R)/(r) subscript-flag from
2300 // returning values to returning the corresponding keys.
2301 let flip_to_keys = idx.starts_with('\u{07}');
2302 if flip_to_keys {
2303 idx = idx[1..].to_string();
2304 }
2305 // Pre-expand `$((arith))` / `$VAR` / `$(cmd)` references in
2306 // the subscript text so downstream slice / index logic sees
2307 // numeric literals it can parse. The compile path passes the
2308 // raw subscript text as a constant; without expansion, a key
2309 // like `$((1+1)),-1` failed `parse::<i64>()` for the lower
2310 // bound and the whole slice fell back to scalar concat.
2311 // Special-flag keys `(I)pat` / `(R)pat` skip this — those
2312 // already get their `$VAR` resolution inside the matchers.
2313 if idx.contains('$')
2314 && !idx.starts_with("(I)")
2315 && !idx.starts_with("(i)")
2316 && !idx.starts_with("(R)")
2317 && !idx.starts_with("(r)")
2318 && !idx.starts_with("(K)")
2319 && !idx.starts_with("(k)")
2320 {
2321 idx = crate::ported::subst::singsub(&idx);
2322 }
2323 // `${pipestatus[N]}` / `${PIPESTATUS[N]}` — pipeline exit
2324 // status array. Populated by BUILTIN_PIPELINE_EXEC after a
2325 // real pipeline; for single commands fall back to a synthetic
2326 // [last_status] list so `true; echo $pipestatus[1]` prints 0.
2327 // After a non-pipeline command runs, the prior pipestatus
2328 // array becomes stale (zsh resets pipestatus to a single-
2329 // element array on every command). Detect by comparing the
2330 // last element to last_status; if they diverge, fall back
2331 // to the synthetic [last_status] form so e.g.
2332 // true | false; echo "$?"; echo "$pipestatus"
2333 // prints "0" (just the echo's status), not "0 1".
2334 if name == "pipestatus" || name == "PIPESTATUS" {
2335 let arr = with_executor(|exec| {
2336 let cached = exec.array(&name);
2337 let last = exec.last_status().to_string();
2338 match cached {
2339 Some(arr)
2340 if arr.last().map(|s| s.as_str()) == Some(last.as_str()) =>
2341 {
2342 arr
2343 }
2344 _ => vec![last],
2345 }
2346 });
2347 if let Ok(i) = idx.parse::<i64>() {
2348 let len = arr.len() as i64;
2349 let resolved = if i > 0 {
2350 (i - 1) as usize
2351 } else if i < 0 {
2352 let off = len + i;
2353 if off < 0 {
2354 return Value::str("");
2355 }
2356 off as usize
2357 } else {
2358 return Value::str("");
2359 };
2360 return Value::str(arr.get(resolved).cloned().unwrap_or_default());
2361 }
2362 if idx == "@" || idx == "*" {
2363 return Value::Array(arr.into_iter().map(Value::str).collect());
2364 }
2365 }
2366
2367 // Special-name positional-param indexing. `${@[N]}`, `${@[N,M]}`,
2368 // `${*[N]}`, `${argv[N]}` all index the positional-param array
2369 // 1-based (zsh semantics). Without this, `@`/`*`/`argv` fall
2370 // through to the scalar-slice path which slices the joined
2371 // string instead.
2372 if matches!(name.as_str(), "@" | "*" | "argv") {
2373 let arr = with_executor(|exec| exec.pparams());
2374 // Slice form `N,M`.
2375 if let Some((s_str, e_str)) = idx.split_once(',') {
2376 let s_opt: Option<i64> = s_str.trim().parse().ok();
2377 let e_opt: Option<i64> = e_str.trim().parse().ok();
2378 if let (Some(s), Some(e)) = (s_opt, e_opt) {
2379 return Value::Array(
2380 getarrvalue(&arr, s, e)
2381 .into_iter()
2382 .map(Value::str)
2383 .collect(),
2384 );
2385 }
2386 }
2387 // Single index.
2388 if let Ok(i) = idx.parse::<i64>() {
2389 let len = arr.len() as i64;
2390 let resolved = if i > 0 {
2391 (i - 1) as usize
2392 } else if i < 0 {
2393 let off = len + i;
2394 if off < 0 {
2395 return Value::str("");
2396 }
2397 off as usize
2398 } else {
2399 return Value::str("");
2400 };
2401 return Value::str(arr.get(resolved).cloned().unwrap_or_default());
2402 }
2403 // Subscript-flag form on positional params: route through
2404 // getarg with positional_params as the array. Matches
2405 // zsh's `${@[(I)pat]}` / `${@[(r)pat]}` semantics.
2406 if idx.starts_with('(') {
2407 if let Some(crate::ported::params::getarg_out::Value(v)) =
2408 crate::ported::params::getarg(&idx, Some(&arr), None, None)
2409 {
2410 return v;
2411 }
2412 }
2413 }
2414 // Magic special-parameter assoc lookups — synthesized from shell
2415 // state on access. zsh exposes shell-introspection assocs like
2416 // `${commands[ls]}`, `${aliases[ll]}`, `${functions[foo]}`,
2417 // `${options[interactive]}`, etc. None of these are stored in
2418 // `assoc_arrays`; we generate the value at lookup time.
2419 //
2420 // BUT: if the user declared `typeset -A NAME` and assigned
2421 // values, their declaration wins. This matches zsh's actual
2422 // module behavior (verified against /bin/zsh): `typeset -A
2423 // langinfo; langinfo[CODESET]=UTF-8; echo $langinfo[CODESET]`
2424 // prints `UTF-8` even though `zsh/langinfo` would normally
2425 // shadow it with nl_langinfo(3). The C source enforces this
2426 // via the module loader: `bin_zmodload` only registers the
2427 // special-parameter table entry when no existing assoc with
2428 // that name exists. Mirroring: skip the magic path if
2429 // `name` is already in `assoc_arrays`.
2430 let user_defined_assoc =
2431 with_executor(|exec| exec.assoc(&name).is_some());
2432 if !user_defined_assoc {
2433 if let Some(v) = magic_assoc_lookup(&name, &idx) {
2434 // Magic-assoc with `(I)pat` glob-match returned an
2435 // Array of matching keys. In DQ context (the user
2436 // wrote `"${aliases[(I)foo*]}"`), zsh joins array
2437 // results with the first IFS char per Src/subst.c
2438 // paramsubst's `nojoin` gating. Without this the
2439 // outer DQ-string was treating the array as a
2440 // splice and emitting one arg per matching key.
2441 if dq_compile {
2442 if let Value::Array(items) = &v {
2443 let strs: Vec<String> =
2444 items.iter().map(|i| i.to_str()).collect();
2445 let sep = with_executor(|exec| {
2446 exec.scalar("IFS")
2447 .and_then(|s| s.chars().next())
2448 .unwrap_or(' ')
2449 });
2450 return Value::str(strs.join(&sep.to_string()));
2451 }
2452 }
2453 return v;
2454 }
2455 }
2456 with_executor(|exec| match idx.as_str() {
2457 "@" | "*" => {
2458 // Splice: assoc → values list (zsh's `${foo[@]}` for assoc);
2459 // indexed → element list. For assoc the order of values is
2460 // implementation-defined (matches HashMap iteration).
2461 if let Some(map) = exec.assoc(&name) {
2462 return Value::Array(map.values().map(Value::str).collect());
2463 }
2464 match exec.array(&name) {
2465 Some(v) => Value::Array(v.iter().map(Value::str).collect()),
2466 None => Value::Array(vec![]),
2467 }
2468 }
2469 _ => {
2470 // Magic-assoc lookup (`${aliases[gst]}`,
2471 // `${commands[ls]}`, etc.) — names backed by zsh's
2472 // parameter-module hashes (Src/Modules/parameter.c)
2473 // that don't live in `exec.assoc_arrays`. Direct
2474 // delegation to the canonical port reader.
2475 if crate::exec::scan_magic_assoc_keys(&name).is_some() {
2476 return Value::str(
2477 exec.get_special_array_value(&name, &idx).unwrap_or_default());
2478 }
2479 if let Some(map) = exec.assoc(&name) {
2480 if let Some((flags, pat)) = (|s: &str| -> Option<(String, String)> {
2481 // Port of subst.c subscript-flag parser:
2482 // `(I)pat` / `(R)pat` / `(i)pat` / `(r)pat`.
2483 // Returns (flags_chars, pattern_after).
2484 let s = s.trim_start();
2485 let rest = s.strip_prefix('(')?;
2486 let close = rest.find(')')?;
2487 let flags = rest[..close].to_string();
2488 let pat = rest[close + 1..].to_string();
2489 if flags.chars().next().is_some_and(|c| matches!(c, 'I' | 'R' | 'i' | 'r' | 'k' | 'K' | 'n' | 'e' | 'b' | 'w' | 'f' | 'p' | 's')) {
2490 Some((flags, pat))
2491 } else { None }
2492 })(&idx) {
2493 // (v)+(I)/(i): subscript searches keys but
2494 // outer wants values. Iterate the assoc and
2495 // return values for keys that match `pat`.
2496 if flip_to_values
2497 && (flags.contains('I') || flags.contains('i'))
2498 {
2499 let return_all = flags.contains('I');
2500 let mut out: Vec<String> = Vec::new();
2501 for (k, v) in map.iter() {
2502 if crate::exec::glob_match_static(k, &pat) {
2503 out.push(v.clone());
2504 if !return_all {
2505 break;
2506 }
2507 }
2508 }
2509 return Value::str(out.join(" "));
2510 }
2511 // (k)+(R)/(r): subscript searches values but
2512 // outer wants keys. Iterate the assoc and
2513 // return keys whose values match.
2514 if flip_to_keys
2515 && (flags.contains('R') || flags.contains('r'))
2516 {
2517 let return_all = flags.contains('R');
2518 let mut out: Vec<String> = Vec::new();
2519 for (k, v) in map.iter() {
2520 if crate::exec::glob_match_static(v, &pat) {
2521 out.push(k.clone());
2522 if !return_all {
2523 break;
2524 }
2525 }
2526 }
2527 return Value::str(out.join(" "));
2528 }
2529 // Default flag handling — route to getarg's
2530 // hash-search arm (params.c:1581-1660).
2531 match crate::ported::params::getarg(&idx, None, Some(&map), None) {
2532 Some(crate::ported::params::getarg_out::Value(v)) => return v,
2533 _ => {}
2534 }
2535 }
2536 return Value::str(map.get(&idx).cloned().unwrap_or_default());
2537 }
2538
2539 let arr = match exec.array(&name) {
2540 Some(a) => a,
2541 None => {
2542 // Fall back to scalar subscripting on `variables`.
2543 // zsh treats `${str[N]}` and `${str[N,M]}` as
2544 // 1-based char indexing. Subscript flags
2545 // `(w)`/`(s/sep/)` on scalars split before
2546 // indexing — direct port of zsh's
2547 // zshparam(1) "Subscript Flags" `w` and `s`.
2548 let scalar = exec.get_variable(&name);
2549 if scalar.is_empty() {
2550 return Value::str("");
2551 }
2552 // `(w)N` on scalar: split by IFS into words,
2553 // return the Nth (1-based). zsh's word
2554 // separator defaults to IFS whitespace.
2555 // `(s/sep/)` overrides the separator. zsh
2556 // also accepts `(ws[chars])` — `s` followed
2557 // by a `[chars]` set treated as IFS for this
2558 // operation.
2559 if let Some((flags, pat)) = (|s: &str| -> Option<(String, String)> {
2560 // Port of subst.c subscript-flag parser:
2561 // `(I)pat` / `(R)pat` / `(i)pat` / `(r)pat`.
2562 // Special-case `(s<delim>...<delim>)` per
2563 // params.c:1458-1476 — `s` introduces a
2564 // delimited separator block.
2565 // Returns (flags_chars, pattern_after).
2566 let s = s.trim_start();
2567 let rest = s.strip_prefix('(')?;
2568 let close = rest.find(')')?;
2569 let flags = rest[..close].to_string();
2570 let pat = rest[close + 1..].to_string();
2571 if flags.starts_with('s') {
2572 return Some((flags, pat));
2573 }
2574 if flags.chars().next().is_some_and(|c| matches!(c, 'I' | 'R' | 'i' | 'r' | 'k' | 'K' | 'n' | 'e' | 'b' | 'w' | 'f' | 'p' | 's')) {
2575 Some((flags, pat))
2576 } else { None }
2577 })(&idx) {
2578 if flags.contains('w') {
2579 if let Ok(n) = pat.parse::<i64>() {
2580 let words: Vec<&str> = scalar.split_whitespace().collect();
2581 let len = words.len() as i64;
2582 let i = if n > 0 {
2583 (n - 1) as usize
2584 } else if n < 0 {
2585 let off = len + n;
2586 if off < 0 {
2587 return Value::str("");
2588 }
2589 off as usize
2590 } else {
2591 return Value::str("");
2592 };
2593 return Value::str(
2594 words.get(i).map(|s| s.to_string()).unwrap_or_default(),
2595 );
2596 }
2597 }
2598 // `(s/sep/)N` is a NO-OP for scalar `[N]`
2599 // indexing — confirmed by testing zsh
2600 // (`a=hello; ${a[(s/l/)1]}` returns "h",
2601 // same as `${a[1]}`). The `(s)` flag
2602 // only affects splitting in word-list
2603 // contexts (`${(s/sep/)var}` without
2604 // index, or `[@]` form). Strip the
2605 // flag, parse the index normally, fall
2606 // through to char slicing.
2607 if flags.starts_with('s') {
2608 if let Ok(i) = pat.parse::<i64>() {
2609 let s_chars: Vec<String> = scalar.chars().map(|c| c.to_string()).collect();
2610 return Value::str(crate::ported::params::getarrvalue(&s_chars, i, i).concat());
2611 }
2612 }
2613 // (i)/(I)/(r)/(R) on scalar — route
2614 // through getarg's scalar char-search
2615 // arm (params.c:1798-1980). Faithful
2616 // port lives in src/ported/params.rs;
2617 // this branch defers to it to avoid
2618 // duplicated drift.
2619 if flags.chars().all(|c| matches!(c, 'i' | 'I' | 'r' | 'R' | 'e')) {
2620 let _ = &pat;
2621 if let Some(crate::ported::params::getarg_out::Value(v)) =
2622 crate::ported::params::getarg(&idx, None, None, Some(&scalar))
2623 {
2624 return v;
2625 }
2626 }
2627 }
2628 // Build a per-char pseudo-array and route slice/index
2629 // through getarrvalue so 1-based inclusive semantics
2630 // and negative-from-end indexing match
2631 // Src/params.c::getstrvalue's char-arm.
2632 let s_chars: Vec<String> = scalar.chars().map(|c| c.to_string()).collect();
2633 if let Some((start_s, end_s)) = idx.split_once(',') {
2634 let parse_one = |s: &str, exec: &mut ShellExecutor| -> Option<i64> {
2635 let t = s.trim();
2636 if t.is_empty() { return None; }
2637 if let Ok(i) = t.parse::<i64>() { return Some(i); }
2638 Some(crate::ported::math::mathevali(&crate::ported::subst::singsub(t)).unwrap_or(0))
2639 };
2640 let s_opt = parse_one(start_s, exec);
2641 let e_opt = parse_one(end_s, exec);
2642 let s_i = s_opt.unwrap_or(1);
2643 let e_i = e_opt.unwrap_or(s_chars.len() as i64);
2644 return Value::str(crate::ported::params::getarrvalue(&s_chars, s_i, e_i).concat());
2645 }
2646 let i = match idx.parse::<i64>() {
2647 Ok(i) => i,
2648 Err(_) => crate::ported::math::mathevali(&crate::ported::subst::singsub(&idx)).unwrap_or(0),
2649 };
2650 return Value::str(crate::ported::params::getarrvalue(&s_chars, i, i).concat());
2651 }
2652 };
2653
2654 // Subscript flag form: (r)pat / (R)pat / (i)pat / (I)pat
2655 // / (e)str / (n:N:)pat. Returns first/last matching value
2656 // or first/last matching index per zsh semantics.
2657 if let Some((flags, pat)) = (|s: &str| -> Option<(String, String)> {
2658 let s = s.trim_start();
2659 let rest = s.strip_prefix('(')?;
2660 let close = rest.find(')')?;
2661 let flags = rest[..close].to_string();
2662 let pat = rest[close + 1..].to_string();
2663 if flags.chars().next().is_some_and(|c| matches!(c, 'I' | 'R' | 'i' | 'r' | 'k' | 'K' | 'n' | 'e' | 'b' | 'w' | 'f' | 'p' | 's')) {
2664 Some((flags, pat))
2665 } else { None }
2666 })(&idx) {
2667 // Route to getarg's array-search arm
2668 // (params.c:1672-1719).
2669 let _ = (&flags, &pat); // silence unused if any
2670 match crate::ported::params::getarg(&idx, Some(&arr), None, None) {
2671 Some(crate::ported::params::getarg_out::Value(v)) => return v,
2672 _ => {}
2673 }
2674 return Value::str("");
2675 }
2676
2677 // Slice form `N,M`: comma separator with int-or-arith
2678 // operands on each side. Negative indices count from
2679 // end. Direct port of zsh's getindex() N,M slice.
2680 //
2681 // Return shape depends on context: in DQ (`"${arr[2,4]}"`)
2682 // zsh joins the slice with the first IFS char into a
2683 // single scalar (Src/subst.c sepjoin path with nojoin=0);
2684 // in unquoted (`${arr[2,4]}`) or `[@]`-style context it
2685 // remains an array. Detect via in_dq_context which the
2686 // BUILTIN_EXPAND_TEXT mode-1 wrapper bumps.
2687 if let Some((start_s, end_s)) = idx.split_once(',') {
2688 // Inline subscript-int parse — mirrors getarg's
2689 // mathevalarg fallback (params.c:1567).
2690 let parse_one = |s: &str, exec: &mut ShellExecutor| -> Option<i64> {
2691 let t = s.trim();
2692 if t.is_empty() { return None; }
2693 if let Ok(i) = t.parse::<i64>() { return Some(i); }
2694 Some(crate::ported::math::mathevali(&crate::ported::subst::singsub(t)).unwrap_or(0))
2695 };
2696 let start = parse_one(start_s, exec);
2697 let end = parse_one(end_s, exec);
2698 if let (Some(s), Some(e)) = (start, end) {
2699 // KSH_ARRAYS: indices are 0-based, so shift
2700 // positive values up by 1 before the (1-based)
2701 // slicer runs. zsh: `setopt ksh_arrays;
2702 // a=(a b c d); echo $a[1,2]` → `b c`.
2703 let ksh = crate::ported::options::opt_state_get("ksharrays").unwrap_or(false);
2704 let s = if ksh && s >= 0 { s + 1 } else { s };
2705 let e = if ksh && e >= 0 { e + 1 } else { e };
2706 let sliced = getarrvalue(&arr, s, e);
2707 // (@) flag in surrounding chain overrides DQ-join
2708 // — always splat to Value::Array so the caller's
2709 // (@)-aware splat path emits each element as its
2710 // own word.
2711 if !force_array && (exec.in_dq_context > 0 || dq_compile) {
2712 let ifs_first = exec
2713 .get_variable("IFS")
2714 .chars()
2715 .next()
2716 .unwrap_or(' ')
2717 .to_string();
2718 return Value::str(sliced.join(&ifs_first));
2719 }
2720 return Value::Array(
2721 sliced.into_iter().map(Value::str).collect(),
2722 );
2723 }
2724 }
2725
2726 // Single index — try literal int first (fast), then fall
2727 // back to arithmetic eval which handles bare variable
2728 // names (`arr[i]`), expressions (`arr[i+1]`), etc.
2729 // KSH_ARRAYS: 0-based, so a 0 means first element and
2730 // valid indices are 0..len-1. Without this, `setopt
2731 // ksh_arrays; a[0]` returned empty (treating 0 as
2732 // "before first" per the standard 1-based path).
2733 let i = match idx.parse::<i64>() {
2734 Ok(i) => i,
2735 Err(_) => crate::ported::math::mathevali(&crate::ported::subst::singsub(&idx)).unwrap_or(0),
2736 };
2737 let len = arr.len() as i64;
2738 let ksh = crate::ported::options::opt_state_get("ksharrays").unwrap_or(false);
2739 let resolved = if ksh {
2740 if i < 0 {
2741 let off = len + i;
2742 if off < 0 {
2743 return Value::str("");
2744 }
2745 off as usize
2746 } else if i >= len {
2747 return Value::str("");
2748 } else {
2749 i as usize
2750 }
2751 } else if i > 0 {
2752 (i - 1) as usize
2753 } else if i < 0 {
2754 let off = len + i;
2755 if off < 0 {
2756 return Value::str("");
2757 }
2758 off as usize
2759 } else {
2760 return Value::str("");
2761 };
2762 Value::str(arr.get(resolved).cloned().unwrap_or_default())
2763 }
2764 })
2765 });
2766
2767 // `${(flags)name}` — apply zsh parameter flags. See BUILTIN_PARAM_FLAG
2768 // doc comment for the supported flag set. Algorithm: load `name` as a
2769 // current-value (scalar from variables/env, array from arrays, or assoc
2770 // from assoc_arrays), then walk `flags` char-by-char applying each
2771 // transformation. Final state is either Value::str or Value::Array
2772 // depending on the last flag.
2773 // Bridge entry that preserves array shape — see the const's doc.
2774 // Pops [content] (the brace body without the outer ${...}) and
2775 // returns Value::Array of per-element words.
2776 //WARNING FAKE AND MUST BE DELETED
2777 vm.register_builtin(BUILTIN_BRIDGE_BRACE_ARRAY, |vm, _argc| {
2778 // Inner body of `${(...)...}` (already stripped of `${`/`}` by
2779 // the caller). Re-wrap and route through subst.rs's paramsubst
2780 // so the flag-loop + per-operator array semantics
2781 // (e.g. `(M)arr:#pat`) execute properly. Earlier this returned
2782 // the body verbatim, which is why `${(M)arr:#pat}` printed as
2783 // literal text.
2784 let body = vm.pop().to_str();
2785 let full = format!("${{{}}}", body);
2786 let result = with_executor(|exec| {
2787 let mut ret_flags: i32 = 0;
2788 let (_full_str, _new_pos, nodes) = crate::ported::subst::paramsubst(
2789 &full,
2790 0,
2791 false,
2792 0i32,
2793 &mut ret_flags,
2794 );
2795 // c:Src/subst.c errflag bail — propagate to caller's
2796 // exit status the way `subst_state_commit_to_executor`
2797 // used to.
2798 if crate::ported::utils::errflag.load(std::sync::atomic::Ordering::Relaxed) != 0 {
2799 exec.set_last_status(1);
2800 }
2801 nodes
2802 });
2803 if result.is_empty() {
2804 return fusevm::Value::Array(Vec::new());
2805 }
2806 if result.len() == 1 {
2807 return fusevm::Value::str(result.into_iter().next().unwrap());
2808 }
2809 fusevm::Value::Array(result.into_iter().map(fusevm::Value::str).collect())
2810 });
2811
2812 vm.register_builtin(BUILTIN_PARAM_FLAG, |vm, _argc| {
2813 let mut flags = vm.pop().to_str();
2814 let name = vm.pop().to_str();
2815
2816 // Compile path tags DQ-wrapped expressions with a leading
2817 // `\u{02}` sentinel. In DQ context, array-only flags are
2818 // no-ops per zsh: `(o)`/`(O)`/`(n)`/`(i)`/`(M)`/`(u)` only
2819 // fire in array context. Strip those flag chars before
2820 // processing so the join-as-scalar path returns the original
2821 // element order.
2822 let dq_compile = flags.starts_with('\u{02}');
2823 if dq_compile {
2824 flags = flags[1..].to_string();
2825 }
2826 // `\u{03}` sentinel = the original name had `[@]`/`[*]` suffix.
2827 // The compile path strips the suffix from name (fast-path
2828 // requires identifier-only), but encodes the splice context
2829 // through this sentinel so DQ flag-stripping still respects it.
2830 let had_at_subscript = flags.starts_with('\u{03}');
2831 if had_at_subscript {
2832 flags = flags[1..].to_string();
2833 }
2834 // `\u{04}` sentinel = scalar-assignment context (compile-time
2835 // detected via `scalar_assign_depth`). Direct port of zsh's
2836 // PREFORK_SINGLE bit (Src/exec.c::addvars line 2546). Strip
2837 // the sentinel and remember it for the split-flag gate
2838 // below.
2839 let ssub_compile = flags.starts_with('\u{04}');
2840 if ssub_compile {
2841 flags = flags[1..].to_string();
2842 }
2843 let dq_runtime = with_executor(|exec| exec.in_dq_context > 0);
2844 // PREFORK_SINGLE equivalent — set when the BUILTIN_PARAM_FLAG
2845 // is being evaluated as the RHS of a scalar assignment.
2846 // Direct port of Src/subst.c:1759 `int ssub = (pf_flags &
2847 // PREFORK_SINGLE)`. Per Src/subst.c:3902 `force_split = !ssub
2848 // && (spbreak || spsep)` — when ssub, the force-split path
2849 // is gated off, so split flags `(f)` / `(s:STR:)` / `(0)` /
2850 // `(z)` produce the original scalar verbatim. Consulted at
2851 // each split flag's effect site below (the flag char itself
2852 // is not removed; instead the split is skipped).
2853 let ssub_runtime = ssub_compile
2854 || with_executor(|exec| exec.in_scalar_assign > 0);
2855 // `[@]` / `[*]` subscript on the name overrides the DQ
2856 // strip — explicit `[@]` marks the array as splice-
2857 // expanded so array-only flags (`o`/`O`/`n`/`i`/`u`)
2858 // still fire on the per-element list. Direct port of
2859 // zsh's subst.c nojoin/spbreak path. Without this,
2860 // `"${(o)a[@]}"` skipped the sort in DQ.
2861 // The explicit `@` flag is also an array-context marker — zsh
2862 // treats `${(@o)a}` same as `${(o)a[@]}` (both keep array-only
2863 // sort flags active in DQ). Without checking flags too, the DQ
2864 // strip dropped `o` for the bare-name `(@o)` case.
2865 let has_at_subscript = had_at_subscript
2866 || name.ends_with("[@]")
2867 || name.ends_with("[*]")
2868 || flags.contains('@');
2869 if (dq_compile || dq_runtime) && !has_at_subscript {
2870 // Strip array-only flag CHARS (sort/unique/index variants)
2871 // from the flag chain — but only when they appear as
2872 // bare flag chars, not as part of a flag-arg like
2873 // `(r:NAME::pad:)` where NAME may contain `n`/`o`/etc.
2874 // Direct port of zsh's nojoin gating in Src/subst.c:1813
2875 // which gates these flags off in DQ context. The C source
2876 // walks the flag chain as a state machine; we mirror that
2877 // by tracking arg-region depth: when we hit `(j:`, `(s:`,
2878 // `(l:`, `(r:` etc., switch into "in-arg" mode and copy
2879 // chars verbatim until the closing delim. Without this
2880 // careful skip, `(r:hlen:: :)` lost the `n` inside the
2881 // identifier, so width parsing returned a truncated name.
2882 let bytes = flags.as_bytes();
2883 let mut out = String::with_capacity(bytes.len());
2884 let mut i = 0;
2885 while i < bytes.len() {
2886 let b = bytes[i] as char;
2887 // Flag chars that take a delimited argument:
2888 // `j:STR:` join, `s:STR:` split, `l:N::pad:`,
2889 // `r:N::pad:`, `Z:STR:`, `g:STR:`. The arg is
2890 // bracket-delimited by the next char.
2891 if matches!(b, 'j' | 's' | 'l' | 'r' | 'Z' | 'g')
2892 && i + 1 < bytes.len()
2893 && !(bytes[i + 1] as char).is_ascii_alphanumeric()
2894 && bytes[i + 1] != b'_'
2895 {
2896 let delim_open = bytes[i + 1] as char;
2897 let delim_close = match delim_open {
2898 '[' => ']',
2899 '{' => '}',
2900 '(' => ')',
2901 '<' => '>',
2902 c => c,
2903 };
2904 out.push(b);
2905 out.push(delim_open);
2906 i += 2;
2907 // For `l:N::pad:` and `r:N::pad:`, the format has
2908 // TWO arg sections: `:N:` then `:pad:`. Walk
2909 // through both, plus any further sections until
2910 // we run out of immediate-`delim_close+delim_open`
2911 // pairs. This matches zsh subst.c get_strarg
2912 // which is called in a loop.
2913 loop {
2914 while i < bytes.len() && bytes[i] as char != delim_close {
2915 out.push(bytes[i] as char);
2916 i += 1;
2917 }
2918 if i < bytes.len() {
2919 out.push(delim_close);
2920 i += 1;
2921 }
2922 // Continue if the next char is the same
2923 // open-delim (another arg section).
2924 if i < bytes.len() && bytes[i] as char == delim_open {
2925 out.push(delim_open);
2926 i += 1;
2927 continue;
2928 }
2929 break;
2930 }
2931 continue;
2932 }
2933 if matches!(b, 'o' | 'O' | 'n' | 'i' | 'u') {
2934 i += 1;
2935 continue;
2936 }
2937 out.push(b);
2938 i += 1;
2939 }
2940 flags = out;
2941 }
2942
2943 // Initial state: prefer assoc → array → scalar lookup. If `P` flag
2944 // is in the chain, we'll re-fetch with the indirected name later.
2945 enum St {
2946 S(String),
2947 A(Vec<String>),
2948 }
2949
2950 // Detect (k) flag PRESENCE early — we need to seed
2951 // magic-assoc lookups with the key set before the flag
2952 // walker re-orders things. Use `flags` (the post-sentinel-
2953 // strip string) since the `chars` Vec is built later.
2954 let want_keys = flags.contains('k');
2955 let want_values = flags.contains('v');
2956
2957 // Literal-string operand sentinel: `${(flags)"text"}` compiles to a
2958 // name prefixed with `\u{01}` followed by the literal value. Skip
2959 // the lookup and seed state with the literal scalar.
2960 let mut state = if let Some(literal) = name.strip_prefix('\u{01}') {
2961 St::S(literal.to_string())
2962 } else {
2963 with_executor(|exec| {
2964 if let Some(map) = exec.assoc(&name) {
2965 // For assoc, default to value list (no flag) — `(k)`/`(v)`
2966 // override.
2967 St::A(map.values().cloned().collect())
2968 } else if let Some(arr) = exec.array(&name) {
2969 St::A(arr)
2970 } else if want_keys {
2971 // `${(k)<magic-assoc>}` — names like `aliases`,
2972 // `functions`, `options`, `commands`, `terminfo`,
2973 // `errnos` etc. are not in `assoc_arrays` (they're
2974 // synthesized via magic-getfn). When the flag set
2975 // includes `k`, return the SCANFN-equivalent key
2976 // list. Direct port of paramsubst's per-special
2977 // scanfn dispatch (Src/Modules/parameter.c +
2978 // system.c + terminfo.c et al.).
2979 if let Some(keys) =
2980 crate::exec::scan_magic_assoc_keys(&name)
2981 {
2982 St::A(keys)
2983 } else {
2984 St::S(exec.get_variable(&name))
2985 }
2986 } else if want_values {
2987 // `${(v)<magic-assoc>}` — values for the same
2988 // magic-getfn list above. zinit/p10k both use
2989 // `${(v)aliases}`-style introspection; the
2990 // earlier (k) branch covered the keys but the
2991 // (v) symmetry was missing, so plugin code that
2992 // looped over alias bodies got an empty list.
2993 if let Some(keys) =
2994 crate::exec::scan_magic_assoc_keys(&name)
2995 {
2996 let values: Vec<String> = keys
2997 .iter()
2998 .map(|k| exec.get_special_array_value(&name, k).unwrap_or_default())
2999 .collect();
3000 St::A(values)
3001 } else {
3002 St::S(exec.get_variable(&name))
3003 }
3004 } else {
3005 St::S(exec.get_variable(&name))
3006 }
3007 })
3008 };
3009
3010 let chars: Vec<char> = flags.chars().collect();
3011 // Pre-scan for `(P)` — indirect: zsh's bin_zmodload-style
3012 // P flag is special. It applies BEFORE all per-char
3013 // transforms regardless of position in the flag string,
3014 // because zsh's paramsubst sets `aspar` early and the
3015 // INITIAL value is the indirected lookup. Without this
3016 // pre-resolve, `${(UP)ref}` first uppercases ref's value
3017 // ("target" → "TARGET") then tries to indirect on "TARGET"
3018 // which is unset, returning empty. zsh produces "HELLO"
3019 // because it indirects FIRST (ref→target, lookup target =
3020 // "hello") then uppercases.
3021 let want_indirect = chars.iter().any(|&c| c == 'P');
3022 // `(Pt)` is a special pairing — type-of-the-target, not
3023 // value-of-the-target. Direct port of Src/subst.c:2807-2854
3024 // `wantt` arm: zsh's `wantt` runs AFTER `aspar` has resolved
3025 // the pm pointer to the target's Param struct, then reads
3026 // `pm->node.flags` for type. Doing the value pre-walker here
3027 // discards the target name and the (t) handler ends up
3028 // introspecting the original pointer ("n" → scalar). Skip
3029 // the value-walker for (Pt); the (t) handler resolves the
3030 // target name itself via `target_for_type` below.
3031 let want_type = chars.iter().any(|&c| c == 't');
3032 let pt_combo = want_indirect && want_type;
3033 if want_indirect && !pt_combo && !matches!(state, St::S(ref s) if s.is_empty()) {
3034 // The state at this point holds the (P) TARGET reference,
3035 // not the original pointer name — the param-flag dispatch
3036 // upstream initialized state to `exec.get_variable(name)`.
3037 // Resolve that target. Two shapes:
3038 // - bare name: `${(P)n}` with `n=foo` → state="foo",
3039 // look up `foo` directly.
3040 // - subscripted name: `${(P)n2}` with `n2="arr[-1]"` →
3041 // state="arr[-1]", split into base="arr" + sub="-1"
3042 // and route through expand_string. Direct port of
3043 // Src/subst.c:2799-2806 where `fetchvalue(&vbuf, &ov, …)`
3044 // parses both name and any trailing `[…]` subscript
3045 // from the same input pointer. Without this split,
3046 // a subscripted target was looked up as a literal
3047 // parameter named "arr[-1]" (always unset → empty).
3048 fn resolve_indirect_target(target: &str, exec: &mut ShellExecutor) -> St {
3049 let (base, sub) = match target.find('[') {
3050 Some(b) if target.ends_with(']') => {
3051 let n = &target[..b];
3052 let s = &target[b + 1..target.len() - 1];
3053 (n.to_string(), Some(s.to_string()))
3054 }
3055 _ => (target.to_string(), None),
3056 };
3057 // Bare-name path.
3058 if sub.is_none() {
3059 if let Some(arr) = exec.array(&base) {
3060 return St::A(arr);
3061 }
3062 return St::S(exec.get_variable(&base));
3063 }
3064 let sub_str = sub.unwrap();
3065 // Assoc lookup: `${(P)"map[key]"}` — single value for
3066 // the given key.
3067 if let Some(m) = exec.assoc(&base) {
3068 return St::S(m.get(&sub_str).cloned().unwrap_or_default());
3069 }
3070 // Indexed-array subscript. Direct port of getindex()
3071 // (Src/params.c) handling for negative indices and
3072 // `lo,hi` slice. expand_string() can't be used here —
3073 // it routes the subscripted form through compile-time
3074 // paths that re-fetch the WHOLE array on the bridge
3075 // back from subst_port. Apply the subscript here
3076 // directly.
3077 if let Some(arr) = exec.array(&base) {
3078 let n = arr.len() as i64;
3079 let to_zero = |i: i64| -> i64 {
3080 if i > 0 {
3081 i - 1
3082 } else if i < 0 {
3083 n + i
3084 } else {
3085 0
3086 }
3087 };
3088 if let Some((lo_s, hi_s)) = sub_str.split_once(',') {
3089 let lo = lo_s.trim().parse::<i64>().unwrap_or(1);
3090 let hi = hi_s.trim().parse::<i64>().unwrap_or(n);
3091 let lo_i = to_zero(lo).max(0);
3092 let hi_i = to_zero(hi);
3093 if hi_i < lo_i || lo_i >= n {
3094 return St::A(Vec::new());
3095 }
3096 let hi_clamped = (hi_i + 1).min(n) as usize;
3097 return St::A(arr[lo_i as usize..hi_clamped].to_vec());
3098 }
3099 if sub_str == "@" || sub_str == "*" {
3100 return St::A(arr);
3101 }
3102 if let Ok(idx) = sub_str.parse::<i64>() {
3103 let real = to_zero(idx);
3104 if real < 0 || real >= n {
3105 return St::S(String::new());
3106 }
3107 return St::S(arr[real as usize].clone());
3108 }
3109 }
3110 // Fallback: scalar with subscript = char-range.
3111 let val = exec.get_variable(&base);
3112 let chars: Vec<char> = val.chars().collect();
3113 let n = chars.len() as i64;
3114 let to_zero = |i: i64| -> i64 {
3115 if i > 0 {
3116 i - 1
3117 } else if i < 0 {
3118 n + i
3119 } else {
3120 0
3121 }
3122 };
3123 if let Some((lo_s, hi_s)) = sub_str.split_once(',') {
3124 let lo = lo_s.trim().parse::<i64>().unwrap_or(1);
3125 let hi = hi_s.trim().parse::<i64>().unwrap_or(n);
3126 let lo_i = to_zero(lo).max(0);
3127 let hi_i = to_zero(hi);
3128 if hi_i < lo_i || lo_i >= n {
3129 return St::S(String::new());
3130 }
3131 let hi_clamped = (hi_i + 1).min(n) as usize;
3132 return St::S(chars[lo_i as usize..hi_clamped].iter().collect());
3133 }
3134 if let Ok(idx) = sub_str.parse::<i64>() {
3135 let real = to_zero(idx);
3136 if real < 0 || real >= n {
3137 return St::S(String::new());
3138 }
3139 return St::S(chars[real as usize].to_string());
3140 }
3141 St::S(String::new())
3142 }
3143 state = match state {
3144 St::S(name) => with_executor(|exec| resolve_indirect_target(&name, exec)),
3145 St::A(names) => with_executor(|exec| {
3146 let resolved: Vec<String> = names
3147 .into_iter()
3148 .map(|n| exec.get_variable(&n))
3149 .collect();
3150 St::A(resolved)
3151 }),
3152 };
3153 }
3154 // Pre-scan for `(p)` — print-style escape interpretation for
3155 // any subsequent `(s::)`, `(j::)`, `(l::)`, `(r::)` argument
3156 // strings. Direct port of src/zsh/Src/subst.c:2381-2382 which
3157 // sets `escapes = 1` and then `untok_and_escape` performs the
3158 // print-escape on those flag args. Order in zsh: only flags
3159 // that appear AFTER `p` get their args escaped; we approximate
3160 // by detecting `p` at the start of the flag string. The exact
3161 // C semantics rely on left-to-right state, but `(ps:..:)` is
3162 // by far the dominant idiom and a position-aware pre-scan is
3163 // the simplest faithful match.
3164 let print_escapes = chars
3165 .iter()
3166 .take_while(|&&c| c != 's' && c != 'j' && c != 'l' && c != 'r')
3167 .any(|&c| c == 'p');
3168 // print_escape_str — interpret \n, \t, \r, \\, \xNN, \NNN
3169 // (octal) per zsh's untok_and_escape behavior. Returns the
3170 // decoded string. Used inline below when print_escapes is set.
3171 fn print_escape_str(s: &str) -> String {
3172 let mut out = String::with_capacity(s.len());
3173 let mut chars = s.chars().peekable();
3174 while let Some(c) = chars.next() {
3175 if c != '\\' {
3176 out.push(c);
3177 continue;
3178 }
3179 match chars.next() {
3180 Some('n') => out.push('\n'),
3181 Some('t') => out.push('\t'),
3182 Some('r') => out.push('\r'),
3183 Some('\\') => out.push('\\'),
3184 Some('\'') => out.push('\''),
3185 Some('"') => out.push('"'),
3186 Some('a') => out.push('\x07'),
3187 Some('b') => out.push('\x08'),
3188 Some('e') | Some('E') => out.push('\x1b'),
3189 Some('f') => out.push('\x0c'),
3190 Some('v') => out.push('\x0b'),
3191 Some('0') => out.push('\0'),
3192 Some('x') => {
3193 let mut hex = String::new();
3194 for _ in 0..2 {
3195 match chars.peek() {
3196 Some(&h) if h.is_ascii_hexdigit() => {
3197 hex.push(h);
3198 chars.next();
3199 }
3200 _ => break,
3201 }
3202 }
3203 if let Ok(n) = u32::from_str_radix(&hex, 16) {
3204 if let Some(c) = char::from_u32(n) {
3205 out.push(c);
3206 }
3207 }
3208 }
3209 Some(d) if d.is_ascii_digit() => {
3210 let mut oct = String::from(d);
3211 for _ in 0..2 {
3212 match chars.peek() {
3213 Some(&h) if h.is_digit(8) => {
3214 oct.push(h);
3215 chars.next();
3216 }
3217 _ => break,
3218 }
3219 }
3220 if let Ok(n) = u32::from_str_radix(&oct, 8) {
3221 if let Some(c) = char::from_u32(n) {
3222 out.push(c);
3223 }
3224 }
3225 }
3226 Some(other) => {
3227 out.push('\\');
3228 out.push(other);
3229 }
3230 None => out.push('\\'),
3231 }
3232 }
3233 out
3234 }
3235 let mut i = 0;
3236 while i < chars.len() {
3237 let c = chars[i];
3238 i += 1;
3239 match c {
3240 '#' => {
3241 // `(#)` — evaluate each element as an arithmetic
3242 // expression, then output the character with that
3243 // code point. Direct port of substevalchar in
3244 // src/zsh/Src/subst.c:1490-1520. zsh's flow:
3245 // ires = mathevali(ptr); // line 1497
3246 // if (errflag) return ""; // 1499-1502
3247 // if (ires < 0) zerr("character not in range"); // 1504-1506
3248 // if MULTIBYTE && ires>127: ucs4tomb // 1508-1511
3249 // else: single-byte sprintf // 1514-1518
3250 let to_char = |s: &str| -> String {
3251 let n = with_executor(|exec| crate::ported::math::mathevali(&crate::ported::subst::singsub(s)).unwrap_or(0));
3252 // zsh subst.c:1504-1518 — negative WARNS but
3253 // STILL outputs the low byte (truncated cast
3254 // through `(int)ires` + `%c` sprintf at line
3255 // 1514-1517). The zerr at line 1505 just sets
3256 // errflag without aborting the function. We
3257 // skip the error message (matches zsh's
3258 // observed silent behavior under -f -c) and
3259 // mirror the low-byte fallback.
3260 if !(0..=0x10FFFF).contains(&n) {
3261 // Truncated cast: low 8 bits as Latin-1
3262 // byte (zsh's `%c` sprintf on `(int)ires`).
3263 let byte = (n as i32 as u32) & 0xFF;
3264 // Encode the byte as raw — for high bytes
3265 // (0x80-0xFF), wrap with the same UTF-8
3266 // promotion zsh's pastebuf() uses.
3267 return char::from_u32(byte)
3268 .map(|c| c.to_string())
3269 .unwrap_or_default();
3270 }
3271 // Valid Unicode scalar — char::from_u32 returns
3272 // the right multi-byte UTF-8 sequence in Rust.
3273 char::from_u32(n as u32)
3274 .map(|c| c.to_string())
3275 .unwrap_or_default()
3276 };
3277 state = match state {
3278 St::S(s) => St::S(to_char(&s)),
3279 St::A(a) => St::A(a.into_iter().map(|s| to_char(&s)).collect()),
3280 };
3281 }
3282 'L' => {
3283 state = match state {
3284 St::S(s) => St::S(s.to_lowercase()),
3285 St::A(a) => St::A(a.into_iter().map(|s| s.to_lowercase()).collect()),
3286 };
3287 }
3288 'U' => {
3289 state = match state {
3290 St::S(s) => St::S(s.to_uppercase()),
3291 St::A(a) => St::A(a.into_iter().map(|s| s.to_uppercase()).collect()),
3292 };
3293 }
3294 'l' | 'r' => {
3295 // (l:N:) — left-pad to width N (truncate if longer).
3296 // (l:N::fill:) — pad with `fill` instead of space.
3297 // (r:N:) — right-pad to width N (truncate if longer).
3298 // Width must be followed by `:` (or `(` etc.) delim.
3299 let pad_left = c == 'l';
3300 if i >= chars.len() || !ZshrsHost::is_zsh_flag_delim(chars[i]) {
3301 // Bare `l`/`r` without delim — skip (only the
3302 // padded form takes a width).
3303 continue;
3304 }
3305 let delim = chars[i];
3306 i += 1;
3307 let mut width_str = String::new();
3308 while i < chars.len() && chars[i] != delim {
3309 width_str.push(chars[i]);
3310 i += 1;
3311 }
3312 if i < chars.len() {
3313 i += 1; // skip closing delim
3314 }
3315 // Width may be a literal number, `$VAR`, or a bare
3316 // identifier (zsh evaluates `(r:hlen:: :)` by
3317 // running `mathevali("hlen")` which reads the
3318 // parameter table). Direct port of Src/subst.c
3319 // `get_intarg()` (line 1428) which does
3320 // `parsestr` → `singsub` → `mathevali`. Fast path:
3321 // if the arg parses as a literal usize, use it
3322 // directly. Otherwise expand `$`-references and
3323 // route through evaluate_arithmetic so bare
3324 // identifiers resolve to their variable values.
3325 let width: usize = if let Ok(n) = width_str.parse() {
3326 n
3327 } else {
3328 let arith_str = crate::ported::subst::arithsubst(&width_str, "", "");
3329 arith_str.parse::<i64>().map(|v| v.unsigned_abs() as usize).unwrap_or(0)
3330 };
3331 // Optional `:fill:` after the width.
3332 let mut fill = String::from(" ");
3333 if i < chars.len() && ZshrsHost::is_zsh_flag_delim(chars[i]) {
3334 let d2 = chars[i];
3335 i += 1;
3336 let mut f = String::new();
3337 while i < chars.len() && chars[i] != d2 {
3338 f.push(chars[i]);
3339 i += 1;
3340 }
3341 if i < chars.len() {
3342 i += 1; // skip closing delim
3343 }
3344 if !f.is_empty() {
3345 fill = if print_escapes {
3346 print_escape_str(&f)
3347 } else {
3348 f
3349 };
3350 }
3351 }
3352 let pad_one = |s: String| -> String {
3353 let len = s.chars().count();
3354 if len >= width {
3355 return s.chars().take(width).collect();
3356 }
3357 let need = width - len;
3358 let mut filler = String::new();
3359 while filler.chars().count() < need {
3360 filler.push_str(&fill);
3361 }
3362 let filler: String = filler.chars().take(need).collect();
3363 if pad_left {
3364 format!("{}{}", filler, s)
3365 } else {
3366 format!("{}{}", s, filler)
3367 }
3368 };
3369 state = match state {
3370 St::S(s) => St::S(pad_one(s)),
3371 St::A(a) => St::A(a.into_iter().map(pad_one).collect()),
3372 };
3373 }
3374 'j' | 's' => {
3375 // zsh syntax: `(j:sep:)` and `(s:sep:)` use the char
3376 // following the flag as the delimiter. The delimiter must
3377 // be a non-alphanumeric, non-underscore char so subsequent
3378 // flags (alphabetic) aren't accidentally swallowed —
3379 // `(jL)` should be `j` (no delim, default IFS) followed
3380 // by `L`, not `j` with delim `L`. Recognized delim chars
3381 // mirror what zsh allows: punctuation only. zsh subst.c
3382 // get_strarg also accepts matched bracket pairs:
3383 // `[`/`]`, `{`/`}`, `(`/`)`, `<`/`>`.
3384 let mut sep = String::new();
3385 if i < chars.len() && ZshrsHost::is_zsh_flag_delim(chars[i]) {
3386 let delim = chars[i];
3387 let close = match delim {
3388 '[' => ']',
3389 '{' => '}',
3390 '(' => ')',
3391 '<' => '>',
3392 c => c,
3393 };
3394 i += 1;
3395 while i < chars.len() && chars[i] != close {
3396 sep.push(chars[i]);
3397 i += 1;
3398 }
3399 if i < chars.len() {
3400 i += 1; // skip closing delim
3401 }
3402 } else if c == 'j' {
3403 // `j` with no delim → join with space (IFS-default).
3404 sep = " ".to_string();
3405 }
3406 // `(p)` print-escape interpretation per
3407 // src/zsh/Src/subst.c:2381-2382 — `\n`, `\t`,
3408 // `\xNN`, `\NNN` (octal) etc. become the actual
3409 // characters in the separator. Additionally,
3410 // (p) enables \$VAR / \${VAR} / \$(cmd) /
3411 // \$((expr)) expansion in the separator string
3412 // (zsh's parsestr+singsub treatment of get_strarg
3413 // results when the (p) flag is present). Without
3414 // (p), these stay literal — confirmed via
3415 // /opt/homebrew/bin/zsh -fc.
3416 if print_escapes && !sep.is_empty() {
3417 sep = print_escape_str(&sep);
3418 if sep.contains('$') || sep.contains('`') {
3419 sep = crate::ported::subst::singsub(&sep);
3420 }
3421 }
3422 if c == 'j' {
3423 state = match state {
3424 St::A(a) => St::S(a.join(&sep)),
3425 St::S(s) => St::S(s),
3426 };
3427 } else {
3428 // (s) splits both scalars and array elements per
3429 // zsh semantics. `(@s:,:)` runs `@` first which
3430 // wraps a scalar in a 1-elem array; `s` must
3431 // still split that element. Same goes for true
3432 // arrays — flat-map split each element.
3433 //
3434 // Empty-field handling — verified against zsh's
3435 // C source (utils.c sepsplit + subst.c around
3436 // line 3273). The actual rule is NOT "drop all
3437 // empties" but more nuanced:
3438 // - Boundary empties (leading or trailing
3439 // run of separators) collapse to ONE empty
3440 // each, regardless of how many separators.
3441 // - Middle empties (consecutive separators
3442 // between non-empties) drop ENTIRELY.
3443 // - `(@)` flag preserves all empties verbatim.
3444 // Examples (no @):
3445 // "a,,b,,c" → [a,b,c] (3)
3446 // ",a,b" → ["",a,b] (3)
3447 // "a,b," → [a,b,""] (3)
3448 // ",,a,,b,," → ["",a,b,""] (4)
3449 // "a,,,b" → [a,b] (2, 3 middle empties)
3450 let keep_empty = chars.contains(&'@');
3451 let collapse = |s: &str, sep: &str| -> Vec<String> {
3452 let parts: Vec<String> = s.split(sep).map(String::from).collect();
3453 if keep_empty {
3454 return parts;
3455 }
3456 // Find first and last non-empty positions.
3457 let first_nonempty = parts.iter().position(|p| !p.is_empty());
3458 let last_nonempty = parts.iter().rposition(|p| !p.is_empty());
3459 match (first_nonempty, last_nonempty) {
3460 (None, _) => {
3461 // All-empty input. Collapse to a
3462 // single empty if input had any
3463 // separator (parts.len() > 1) and
3464 // therefore had a "boundary";
3465 // empty input → empty output.
3466 if parts.len() > 1 {
3467 vec![String::new()]
3468 } else {
3469 Vec::new()
3470 }
3471 }
3472 (Some(fi), Some(li)) => {
3473 let mut out: Vec<String> = Vec::new();
3474 if fi > 0 {
3475 out.push(String::new());
3476 }
3477 // Push only non-empty middles; drop
3478 // every internal empty.
3479 for p in &parts[fi..=li] {
3480 if !p.is_empty() {
3481 out.push(p.clone());
3482 }
3483 }
3484 if li < parts.len() - 1 {
3485 out.push(String::new());
3486 }
3487 out
3488 }
3489 _ => parts,
3490 }
3491 };
3492 state = match state {
3493 St::S(s) if sep.is_empty() => {
3494 St::A(s.chars().map(|c| c.to_string()).collect())
3495 }
3496 St::S(s) => St::A(collapse(&s, sep.as_str())),
3497 St::A(a) => {
3498 let mut out: Vec<String> = Vec::with_capacity(a.len());
3499 for elem in a {
3500 if sep.is_empty() {
3501 for c in elem.chars() {
3502 out.push(c.to_string());
3503 }
3504 } else {
3505 out.extend(collapse(&elem, sep.as_str()));
3506 }
3507 }
3508 St::A(out)
3509 }
3510 };
3511 }
3512 }
3513 'f' => {
3514 // Suppress the split entirely in scalar-assignment
3515 // context per Src/subst.c:3902 ssub gate. The
3516 // value passes through unchanged (preserves
3517 // original `\n` separators in `y="${(f)x}"`).
3518 if !ssub_runtime {
3519 state = match state {
3520 St::S(s) => St::A(s.split('\n').map(String::from).collect()),
3521 St::A(a) => {
3522 // Same flat-map rule as (s): split each element.
3523 let mut out: Vec<String> = Vec::with_capacity(a.len());
3524 for elem in a {
3525 for line in elem.split('\n') {
3526 out.push(line.to_string());
3527 }
3528 }
3529 St::A(out)
3530 }
3531 };
3532 }
3533 }
3534 '0' => {
3535 // `(0)` — split on NUL byte. Direct port of
3536 // src/zsh/Src/subst.c:2292-2297 which sets `spsep`
3537 // to a meta-encoded NUL. We split on the literal
3538 // `\0` character. Same flat-map behaviour as `(f)`.
3539 // Same ssub gate.
3540 if !ssub_runtime { state = match state {
3541 St::S(s) => St::A(s.split('\0').map(String::from).collect()),
3542 St::A(a) => {
3543 let mut out: Vec<String> = Vec::with_capacity(a.len());
3544 for elem in a {
3545 for piece in elem.split('\0') {
3546 out.push(piece.to_string());
3547 }
3548 }
3549 St::A(out)
3550 }
3551 }; }
3552 }
3553 'F' => {
3554 // (F) — join array elements with newlines (mirror
3555 // of (j:\n:) but as a one-letter shorthand).
3556 state = match state {
3557 St::A(a) => St::S(a.join("\n")),
3558 s => s,
3559 };
3560 }
3561 'Q' => {
3562 // (Q) — full shell-quoting reversal. Direct port of
3563 // Src/utils.c::dequotestring which scans the entire
3564 // string, handling SQ-spans (`'…'`), DQ-spans
3565 // (`"…"`) with backslash escapes, and standalone
3566 // `\X` escapes — NOT just outer-bslashquote strip. The
3567 // canonical roundtrip is `(qq)` → `(Q)` for strings
3568 // containing single quotes: `(qq)` of `a'b` produces
3569 // `'a'\''b'` and `(Q)` must reverse the four
3570 // close/escape/open transitions to recover `a'b`.
3571 // Earlier outer-bslashquote-strip left `a'\''b` literal.
3572 let dequote = |s: &str| -> String {
3573 let mut out = String::with_capacity(s.len());
3574 let mut chars = s.chars().peekable();
3575 while let Some(c) = chars.next() {
3576 match c {
3577 '\\' => {
3578 if let Some(&nx) = chars.peek() {
3579 out.push(nx);
3580 chars.next();
3581 }
3582 }
3583 '\'' => {
3584 while let Some(&inner) = chars.peek() {
3585 chars.next();
3586 if inner == '\'' {
3587 break;
3588 }
3589 out.push(inner);
3590 }
3591 }
3592 '"' => {
3593 while let Some(&inner) = chars.peek() {
3594 chars.next();
3595 if inner == '"' {
3596 break;
3597 }
3598 if inner == '\\' {
3599 if let Some(&esc) = chars.peek() {
3600 out.push(esc);
3601 chars.next();
3602 continue;
3603 }
3604 }
3605 out.push(inner);
3606 }
3607 }
3608 _ => out.push(c),
3609 }
3610 }
3611 out
3612 };
3613 state = match state {
3614 St::S(s) => St::S(dequote(&s)),
3615 St::A(a) => St::A(a.into_iter().map(|s| dequote(&s)).collect()),
3616 };
3617 }
3618 'z' => {
3619 // (z) — split by shell-token rules: whitespace
3620 // boundaries, BUT also split out shell metacharacters
3621 // like `;`, `&`, `|`, `(`, `)`, `<`, `>` as their
3622 // own tokens. Honors single/double quotes (treat
3623 // contents as one token, strip outer quotes from
3624 // the result). Matches zsh's `(z)` flag.
3625 state = match state {
3626 St::S(s) => St::A(bufferwords_z(&s)),
3627 St::A(a) => St::A(a),
3628 };
3629 }
3630 'w' => {
3631 // (w) — count words; in the array sense, just split
3632 // on whitespace and let downstream consumers count.
3633 state = match state {
3634 St::S(s) => St::A(s.split_whitespace().map(String::from).collect()),
3635 St::A(a) => St::A(a),
3636 };
3637 }
3638 'o' | 'O' => {
3639 // Optional sub-flag: `n` numeric, `i` case-insensitive,
3640 // `a` array-order (i.e. don't sort, just reverse for O).
3641 // Also detect `n`/`i` BEFORE the `o`/`O` (zsh's
3642 // `(no)` and `(io)` shapes — order-agnostic).
3643 let sub = chars.get(i).copied();
3644 let consume = matches!(sub, Some('n') | Some('i') | Some('a'));
3645 if consume {
3646 i += 1;
3647 }
3648 // Look back: was `n` or `i` already in the flags
3649 // string before this `o`? zsh treats `(no)` same
3650 // as `(on)` — numeric sort applied to the
3651 // ascending order. Only relevant if no inline sub
3652 // was found.
3653 let sub = if consume {
3654 sub
3655 } else {
3656 let prefix = &chars[..i.saturating_sub(1)];
3657 if prefix.contains(&'n') {
3658 Some('n')
3659 } else if prefix.contains(&'i') {
3660 Some('i')
3661 } else {
3662 None
3663 }
3664 };
3665 let consume = consume || matches!(sub, Some('n') | Some('i') | Some('a'));
3666 let descending = c == 'O';
3667 state = match state {
3668 St::A(mut a) => {
3669 match sub {
3670 Some('a') if consume => {
3671 if descending {
3672 a.reverse();
3673 }
3674 // ascending + array-order = no-op
3675 }
3676 Some('n') if consume => {
3677 // Natural sort: compare by chunks of
3678 // digits-vs-non-digits so "file10"
3679 // sorts after "file2".
3680 a.sort_by(|x, y| {
3681 let cmp = crate::extensions::stringsort::natural_cmp(x, y);
3682 if descending {
3683 cmp.reverse()
3684 } else {
3685 cmp
3686 }
3687 });
3688 }
3689 Some('i') if consume => {
3690 a.sort_by(|x, y| {
3691 let xl = x.to_lowercase();
3692 let yl = y.to_lowercase();
3693 if descending {
3694 yl.cmp(&xl)
3695 } else {
3696 xl.cmp(&yl)
3697 }
3698 });
3699 }
3700 _ => {
3701 if descending {
3702 a.sort_by(|x, y| y.cmp(x));
3703 } else {
3704 a.sort();
3705 }
3706 }
3707 }
3708 St::A(a)
3709 }
3710 s => s,
3711 };
3712 }
3713 'u' => {
3714 // Unique: preserve first occurrence, drop later dupes.
3715 state = match state {
3716 St::A(a) => {
3717 let mut seen = std::collections::HashSet::new();
3718 let unique: Vec<String> =
3719 a.into_iter().filter(|s| seen.insert(s.clone())).collect();
3720 St::A(unique)
3721 }
3722 s => s,
3723 };
3724 }
3725 'C' => {
3726 // `(C)` — capitalize. Direct port of
3727 // src/zsh/Src/hist.c:2239-2256 CASMOD_CAPS via
3728 // crate::ported::hist::casemodify. Treats any non-
3729 // alphanumeric (including punctuation, control
3730 // chars, NOT just whitespace) as a word boundary
3731 // and lowercases mid-word uppercase letters.
3732 state = match state {
3733 St::S(s) => {
3734 St::S(crate::ported::hist::casemodify(&s, crate::ported::hist::CASMOD_CAPS))
3735 }
3736 St::A(a) => St::A(
3737 a.into_iter()
3738 .map(|s| crate::ported::hist::casemodify(&s, crate::ported::hist::CASMOD_CAPS))
3739 .collect(),
3740 ),
3741 };
3742 }
3743 'V' => {
3744 // Make non-printable characters visible. zsh:
3745 // `^X` for control chars (X = char + 64); `\M-X`
3746 // for high-bit chars; backslash escapes for
3747 // common forms (\n, \t, \r). zshrs's separate
3748 // ZshParamFlag::Visible path implements this for
3749 // the multi-flag dispatcher, but the inline state
3750 // machine had no `V` arm so `${(V)x}` left
3751 // control chars raw.
3752 let visible = |s: &str| -> String {
3753 let mut out = String::with_capacity(s.len());
3754 for c in s.chars() {
3755 match c {
3756 '\n' => out.push_str("\\n"),
3757 '\t' => out.push_str("\\t"),
3758 '\r' => out.push_str("\\r"),
3759 c if c.is_control() => {
3760 out.push('^');
3761 out.push((c as u8 + 64) as char);
3762 }
3763 _ => out.push(c),
3764 }
3765 }
3766 out
3767 };
3768 state = match state {
3769 St::S(s) => St::S(visible(&s)),
3770 St::A(a) => St::A(a.into_iter().map(|s| visible(&s)).collect()),
3771 };
3772 }
3773 'D' => {
3774 // (D) named-directory substitution per
3775 // Src/subst.c:4155 (`mods & 1`) → substnamedir.
3776 // Replace $HOME prefix with `~` and any longer
3777 // named-dir match with `~name`. Per-element on
3778 // arrays, longest-prefix-first to avoid shallow
3779 // shadowing (a `~zpwr=/Users/wizard/zpwr`
3780 // override beats the bare `~=/Users/wizard`).
3781 let render_d = |s: &str| -> String {
3782 with_executor(|_exec| {
3783 let mut out = s.to_string();
3784 // First the longer named dirs.
3785 let mut entries: Vec<(String, std::path::PathBuf)> =
3786 crate::ported::hashnameddir::nameddirtab()
3787 .lock().ok()
3788 .map(|g| g.iter()
3789 .map(|(k, nd)| (k.clone(), std::path::PathBuf::from(&nd.dir)))
3790 .collect())
3791 .unwrap_or_default();
3792 entries.sort_by_key(|(_, p)| std::cmp::Reverse(p.as_os_str().len()));
3793 for (name, path) in &entries {
3794 let path_s = path.to_string_lossy();
3795 if !path_s.is_empty() && out.starts_with(path_s.as_ref()) {
3796 return format!(
3797 "~{}{}",
3798 name,
3799 &out[path_s.len()..]
3800 );
3801 }
3802 }
3803 // Then $HOME — only if no named-dir matched.
3804 if let Some(home) = crate::ported::params::getsparam("HOME") {
3805 if !home.is_empty() && out.starts_with(&home) {
3806 out = format!("~{}", &out[home.len()..]);
3807 }
3808 } else if let Ok(home) = std::env::var("HOME") {
3809 if !home.is_empty() && out.starts_with(&home) {
3810 out = format!("~{}", &out[home.len()..]);
3811 }
3812 }
3813 out
3814 })
3815 };
3816 state = match state {
3817 St::S(s) => St::S(render_d(&s)),
3818 St::A(a) => St::A(a.into_iter().map(|s| render_d(&s)).collect()),
3819 };
3820 }
3821 'P' => {
3822 // (P) was already applied as the pre-walker
3823 // initial-state transform — see `want_indirect`
3824 // above. The walker pass is a no-op for P.
3825 state = match state {
3826 St::S(s) => St::S(s),
3827 St::A(a) => St::A(a),
3828 };
3829 }
3830 '@' => {
3831 // Force array shape (scalar → 1-elem array).
3832 state = match state {
3833 St::S(s) => St::A(vec![s]),
3834 a => a,
3835 };
3836 }
3837 'k' => {
3838 // Keys of assoc. If immediately followed by 'v' (or
3839 // earlier state was already 'v'-set), interleave key/value
3840 // pairs (zsh's `(kv)` form). For regular arrays zsh
3841 // returns the values themselves (a quirk: docs say
3842 // "integer subscripts" but the actual implementation
3843 // returns array contents — verified against /bin/zsh).
3844 if i < chars.len() && chars[i] == 'v' {
3845 i += 1; // consume the 'v'
3846 let pairs = with_executor(|exec| {
3847 if let Some(m) = exec.assoc(&name) {
3848 let mut out = Vec::with_capacity(m.len() * 2);
3849 for (k, v) in m {
3850 out.push(k.clone());
3851 out.push(v.clone());
3852 }
3853 out
3854 } else if let Some(arr) = exec.array(&name) {
3855 arr
3856 } else {
3857 // Magic-assoc fallback for (kv): emit
3858 // alternating [key, value] pairs by
3859 // pairing magic_assoc_keys with
3860 // get_special_array_value lookups.
3861 if let Some(keys) = crate::exec::scan_magic_assoc_keys(&name) {
3862 let mut out = Vec::with_capacity(keys.len() * 2);
3863 for k in keys {
3864 let v = exec
3865 .get_special_array_value(&name, &k)
3866 .unwrap_or_default();
3867 out.push(k);
3868 out.push(v);
3869 }
3870 out
3871 } else {
3872 Vec::new()
3873 }
3874 }
3875 });
3876 state = St::A(pairs);
3877 } else {
3878 let keys = with_executor(|exec| {
3879 if let Some(m) = exec.assoc(&name) {
3880 m.keys().cloned().collect::<Vec<_>>()
3881 } else if let Some(arr) = exec.array(&name) {
3882 // zsh quirk: `(k)` on a regular array
3883 // returns the array values themselves.
3884 arr
3885 } else {
3886 // `${(k)<magic-assoc>}` — names like
3887 // `aliases`, `functions`, `options`,
3888 // `commands`, `terminfo`, `errnos`,
3889 // etc. Direct port of the per-special
3890 // scanfn dispatch (Src/Modules/
3891 // parameter.c et al.). Returns the
3892 // sorted key set the C source builds
3893 // by walking each magic table.
3894 crate::exec::scan_magic_assoc_keys(&name)
3895 .unwrap_or_default()
3896 }
3897 });
3898 state = St::A(keys);
3899 }
3900 }
3901 'v' => {
3902 // Values of assoc. If immediately followed by 'k',
3903 // interleave value/key pairs (zsh's `(vk)` form, less
3904 // common than `(kv)` but supported for symmetry).
3905 // Magic-assoc fallback when name isn't in
3906 // assoc_arrays (`aliases`, `functions`, `commands`,
3907 // `options`, `parameters`, `terminfo`, `errnos`,
3908 // `sysparams`) — synthesize the value list from the
3909 // executor's get_special_array_value scanfn-equivalent.
3910 if i < chars.len() && chars[i] == 'k' {
3911 i += 1; // consume the 'k'
3912 let pairs = with_executor(|exec| {
3913 if let Some(m) = exec.assoc(&name) {
3914 let mut out = Vec::with_capacity(m.len() * 2);
3915 for (k, v) in m {
3916 out.push(v.clone());
3917 out.push(k.clone());
3918 }
3919 out
3920 } else if let Some(keys) =
3921 crate::exec::scan_magic_assoc_keys(&name)
3922 {
3923 let mut out = Vec::with_capacity(keys.len() * 2);
3924 for k in keys {
3925 let v = exec
3926 .get_special_array_value(&name, &k)
3927 .unwrap_or_default();
3928 out.push(v);
3929 out.push(k);
3930 }
3931 out
3932 } else {
3933 Vec::new()
3934 }
3935 });
3936 state = St::A(pairs);
3937 } else {
3938 let vals = with_executor(|exec| {
3939 if let Some(m) = exec.assoc(&name) {
3940 m.values().cloned().collect::<Vec<_>>()
3941 } else if let Some(keys) =
3942 crate::exec::scan_magic_assoc_keys(&name)
3943 {
3944 keys.iter()
3945 .map(|k| {
3946 exec.get_special_array_value(&name, k)
3947 .unwrap_or_default()
3948 })
3949 .collect()
3950 } else {
3951 Vec::new()
3952 }
3953 });
3954 state = St::A(vals);
3955 }
3956 }
3957 '#' => {
3958 state = match state {
3959 St::A(a) => St::S(a.len().to_string()),
3960 St::S(s) => St::S(s.len().to_string()),
3961 };
3962 }
3963 'q' => {
3964 // (q) quoting flag — direct port of `case 'q':` in
3965 // Src/subst.c:2235-2253. zsh accepts ONLY:
3966 // q backslash-escape (QT_BACKSLASH)
3967 // qq single-bslashquote (QT_SINGLE)
3968 // qqq double-bslashquote (QT_DOUBLE)
3969 // qqqq $'…' ANSI-C (QT_DOLLARS)
3970 // q- QT_SINGLE_OPTIONAL (single-bslashquote if needed)
3971 // q+ QT_QUOTEDZPUTS (quotedzputs() format)
3972 // No `q*`, no `q!`, and crucially no `q:str:` delimiter
3973 // form — those were bot-invented extensions. The
3974 // `q:str:` arm in particular treated `@` as a delimiter
3975 // (since `@` is non-alphanumeric so `is_zsh_flag_delim`
3976 // returned true), capturing `explicit_delim=Some("")`
3977 // and then `s.replace("", "\\")` inserted `\` between
3978 // every char. That broke `${(qqqq@)arr}` and any other
3979 // q-flag combined with a flag-letter that's also non-
3980 // alphanumeric. Reference: zsh has no q-delimiter form.
3981 let mut level = 1;
3982 while i < chars.len() && chars[i] == 'q' && level < 4 {
3983 level += 1;
3984 i += 1;
3985 }
3986 let mut strip_trailing_newlines = false;
3987 let mut wrap_only_if_needed = false;
3988 let escape_glob_chars = false; // c:2235 (no q* in zsh)
3989 let explicit_delim: Option<String> = None; // c:2235 (no q:str: in zsh)
3990 while i < chars.len() {
3991 match chars[i] {
3992 '+' => {
3993 // c:2245-2246 — q+ → QT_QUOTEDZPUTS. Mapped
3994 // to wrap-only-if-needed pending a faithful
3995 // QT_QUOTEDZPUTS port.
3996 wrap_only_if_needed = true;
3997 i += 1;
3998 }
3999 '-' => {
4000 // c:2245-2246 — q- → QT_SINGLE_OPTIONAL.
4001 // Currently mapped to strip_trailing_newlines
4002 // pending a faithful QT_SINGLE_OPTIONAL port.
4003 strip_trailing_newlines = true;
4004 i += 1;
4005 }
4006 _ => break,
4007 }
4008 }
4009 let needs_quoting = |s: &str| -> bool {
4010 s.is_empty()
4011 || s.chars().any(|c| {
4012 c.is_whitespace()
4013 || matches!(
4014 c,
4015 '\'' | '"'
4016 | '\\'
4017 | '$'
4018 | '`'
4019 | '*'
4020 | '?'
4021 | '['
4022 | ']'
4023 | '{'
4024 | '}'
4025 | '('
4026 | ')'
4027 | '|'
4028 | '&'
4029 | ';'
4030 | '<'
4031 | '>'
4032 | '#'
4033 | '~'
4034 )
4035 })
4036 };
4037 let quote_one = |raw: &str| -> String {
4038 let s_owned: String;
4039 let s = if strip_trailing_newlines {
4040 s_owned = raw.trim_end_matches('\n').to_string();
4041 s_owned.as_str()
4042 } else {
4043 raw
4044 };
4045 if wrap_only_if_needed {
4046 // q+: skip quoting if the value is "shell-safe";
4047 // otherwise wrap with single-quotes (zsh's q+
4048 // promotes to single-bslashquote level when needed).
4049 if !needs_quoting(s) {
4050 return s.to_string();
4051 }
4052 return format!("'{}'", s.replace('\'', "'\\''"));
4053 }
4054 if let Some(ref d) = explicit_delim {
4055 // q:str: form — wrap value with the explicit
4056 // delimiter on each side, escaping inner d's
4057 // with backslash.
4058 let escaped = s.replace(d.as_str(), &format!("\\{}", d));
4059 return format!("{}{}{}", d, escaped, d);
4060 }
4061 match level {
4062 1 => {
4063 // q: backslash-escape every shell-special
4064 // char without surrounding quotes. zsh
4065 // special-cases the empty string: `${(q)x}`
4066 // for empty `x` outputs `''` (a real
4067 // single-quoted empty pair) so the
4068 // value survives word-splitting in the
4069 // consumer.
4070 if s.is_empty() {
4071 return "''".to_string();
4072 }
4073 let mut out = String::with_capacity(s.len() + 4);
4074 for c in s.chars() {
4075 if matches!(
4076 c,
4077 ' ' | '\t'
4078 | '\''
4079 | '"'
4080 | '\\'
4081 | '$'
4082 | '`'
4083 | '*'
4084 | '?'
4085 | '['
4086 | ']'
4087 | '{'
4088 | '}'
4089 | '('
4090 | ')'
4091 | '|'
4092 | '&'
4093 | ';'
4094 | '<'
4095 | '>'
4096 | '#'
4097 | '~'
4098 ) {
4099 out.push('\\');
4100 }
4101 out.push(c);
4102 }
4103 out
4104 }
4105 2 => {
4106 // qq: single-bslashquote, escape inner ' as '\''.
4107 let mut escaped = s.replace('\'', "'\\''");
4108 if escape_glob_chars {
4109 escaped = escaped.replace('*', "\\*").replace('?', "\\?");
4110 }
4111 format!("'{}'", escaped)
4112 }
4113 3 => {
4114 // qqq: double-bslashquote, escape $ ` " \\.
4115 let mut out = String::with_capacity(s.len() + 2);
4116 out.push('"');
4117 for c in s.chars() {
4118 match c {
4119 '$' | '`' | '"' | '\\' => {
4120 out.push('\\');
4121 out.push(c);
4122 }
4123 '*' | '?' if escape_glob_chars => {
4124 out.push('\\');
4125 out.push(c);
4126 }
4127 _ => out.push(c),
4128 }
4129 }
4130 out.push('"');
4131 out
4132 }
4133 _ => {
4134 // qqqq: ANSI-C $'…' style.
4135 let mut out = String::with_capacity(s.len() + 4);
4136 out.push_str("$'");
4137 for c in s.chars() {
4138 match c {
4139 '\\' => out.push_str("\\\\"),
4140 '\'' => out.push_str("\\'"),
4141 '\n' => out.push_str("\\n"),
4142 '\t' => out.push_str("\\t"),
4143 '\r' => out.push_str("\\r"),
4144 c if (c as u32) < 0x20 => {
4145 out.push_str(&format!("\\x{:02x}", c as u32));
4146 }
4147 c => out.push(c),
4148 }
4149 }
4150 out.push('\'');
4151 out
4152 }
4153 }
4154 };
4155 state = match state {
4156 St::S(s) => St::S(quote_one(&s)),
4157 St::A(a) => {
4158 // Empty array under `(q)`/`(qq)` flag emits a
4159 // single empty quoted pair (`''`) — zsh treats
4160 // the empty array as `[""]` for quoting so the
4161 // result still occupies a slot. Without this
4162 // special case, `${(qq)a}` for an empty `a`
4163 // produced an actually-empty string.
4164 if a.is_empty() {
4165 St::A(vec![quote_one("")])
4166 } else {
4167 St::A(a.into_iter().map(|s| quote_one(&s)).collect())
4168 }
4169 }
4170 };
4171 }
4172 'g' => {
4173 // Process backslash escapes (`\n`, `\t`, `\r`, `\\`,
4174 // `\xNN`, `\NNN` octal). Applied to the current scalar
4175 // or each array element.
4176 let unescape = |s: &str| -> String {
4177 let mut out = String::with_capacity(s.len());
4178 let mut chars = s.chars().peekable();
4179 while let Some(c) = chars.next() {
4180 if c != '\\' {
4181 out.push(c);
4182 continue;
4183 }
4184 match chars.next() {
4185 Some('n') => out.push('\n'),
4186 Some('t') => out.push('\t'),
4187 Some('r') => out.push('\r'),
4188 Some('\\') => out.push('\\'),
4189 Some('\'') => out.push('\''),
4190 Some('"') => out.push('"'),
4191 Some('0') => out.push('\0'),
4192 Some('a') => out.push('\x07'),
4193 Some('b') => out.push('\x08'),
4194 Some('f') => out.push('\x0c'),
4195 Some('v') => out.push('\x0b'),
4196 Some('x') => {
4197 let mut hex = String::new();
4198 for _ in 0..2 {
4199 if let Some(&h) = chars.peek() {
4200 if h.is_ascii_hexdigit() {
4201 hex.push(h);
4202 chars.next();
4203 } else {
4204 break;
4205 }
4206 }
4207 }
4208 if let Ok(b) = u8::from_str_radix(&hex, 16) {
4209 out.push(b as char);
4210 }
4211 }
4212 Some(other) => {
4213 out.push('\\');
4214 out.push(other);
4215 }
4216 None => out.push('\\'),
4217 }
4218 }
4219 out
4220 };
4221 state = match state {
4222 St::S(s) => St::S(unescape(&s)),
4223 St::A(a) => St::A(a.into_iter().map(|s| unescape(&s)).collect()),
4224 };
4225 }
4226 'n' => {
4227 // Numeric sort. Direct port of src/zsh/Src/sort.c:137-172
4228 // (eltpcmp's `if (sortnumeric)` block) and subst.c:2217
4229 // (case 'n' sets SORTIT_NUMERICALLY).
4230 //
4231 // Two flavors per zsh — controlled by sortnumeric value:
4232 // 1 (positive) — unsigned. A leading `-` is just
4233 // another non-digit char and is
4234 // compared lexicographically. (n)
4235 // alone takes this path.
4236 // -1 (negative) — signed. A `-` immediately preceding
4237 // digits flips the comparison so that
4238 // `-5 < -3 < 1`. Triggered by the
4239 // `-` flag char per subst.c:2220-2222
4240 // (case '-': sortit |= NUMERICALLY_SIGNED).
4241 //
4242 // We pre-scan the flag string for a literal `-` after
4243 // the `n` to enable signed mode. This matches the order-
4244 // independent behavior of zsh's flag dispatch (any
4245 // `-` in the (...) group enables signed mode for the
4246 // numeric sort).
4247 let signed = chars.contains(&'-');
4248 fn natural_cmp(a: &str, b: &str, signed: bool) -> std::cmp::Ordering {
4249 if signed {
4250 // Strip a leading sign and compare numerically
4251 // when both look like signed integers. Falls
4252 // back to per-char compare when not numeric.
4253 let parse_signed = |s: &str| -> Option<i128> {
4254 let bytes = s.as_bytes();
4255 if bytes.is_empty() {
4256 return None;
4257 }
4258 let (neg, rest) = match bytes[0] {
4259 b'-' if bytes.len() > 1 && bytes[1].is_ascii_digit() => {
4260 (true, &s[1..])
4261 }
4262 b'+' if bytes.len() > 1 && bytes[1].is_ascii_digit() => {
4263 (false, &s[1..])
4264 }
4265 c if c.is_ascii_digit() => (false, s),
4266 _ => return None,
4267 };
4268 rest.parse::<i128>().ok().map(|n| if neg { -n } else { n })
4269 };
4270 if let (Some(va), Some(vb)) = (parse_signed(a), parse_signed(b)) {
4271 return va.cmp(&vb);
4272 }
4273 // fall through to natural compare below
4274 }
4275 let mut ai = a.chars().peekable();
4276 let mut bi = b.chars().peekable();
4277 loop {
4278 match (ai.peek(), bi.peek()) {
4279 (None, None) => return Ordering::Equal,
4280 (None, _) => return Ordering::Less,
4281 (_, None) => return Ordering::Greater,
4282 (Some(ca), Some(cb))
4283 if ca.is_ascii_digit() && cb.is_ascii_digit() =>
4284 {
4285 let mut na = String::new();
4286 while let Some(&c) = ai.peek() {
4287 if c.is_ascii_digit() {
4288 na.push(c);
4289 ai.next();
4290 } else {
4291 break;
4292 }
4293 }
4294 let mut nb = String::new();
4295 while let Some(&c) = bi.peek() {
4296 if c.is_ascii_digit() {
4297 nb.push(c);
4298 bi.next();
4299 } else {
4300 break;
4301 }
4302 }
4303 let va: u128 = na.parse().unwrap_or(0);
4304 let vb: u128 = nb.parse().unwrap_or(0);
4305 match va.cmp(&vb) {
4306 Ordering::Equal => continue,
4307 ord => return ord,
4308 }
4309 }
4310 (Some(&ca), Some(&cb)) => {
4311 ai.next();
4312 bi.next();
4313 match ca.cmp(&cb) {
4314 Ordering::Equal => continue,
4315 ord => return ord,
4316 }
4317 }
4318 }
4319 }
4320 }
4321 state = match state {
4322 St::A(mut a) => {
4323 a.sort_by(|x, y| natural_cmp(x, y, signed));
4324 St::A(a)
4325 }
4326 s => s,
4327 };
4328 }
4329 '-' => {
4330 // `(-)` — signed-numeric sort modifier per
4331 // src/zsh/Src/subst.c:2220-2222. The actual sort
4332 // happens in the `n` arm above; this arm just
4333 // consumes the flag char so unrecognized-flag
4334 // paths don't trip on it.
4335 }
4336 'i' => {
4337 // Case-insensitive sort. Re-applies sort using lowercase
4338 // comparison; if the array isn't sorted, this is the
4339 // sort-key.
4340 state = match state {
4341 St::A(mut a) => {
4342 a.sort_by_key(|x| x.to_lowercase());
4343 St::A(a)
4344 }
4345 s => s,
4346 };
4347 }
4348 't' => {
4349 // Type query. zsh's `(t)` flag returns the base
4350 // type plus any attribute markers separated by `-`.
4351 // Examples: `integer`, `float`, `scalar-readonly`,
4352 // `scalar-export`, `scalar-left` (typeset -L N),
4353 // `scalar-right_blanks`, `array`, `association`.
4354 //
4355 // `(Pt)` combo: direct port of Src/subst.c:2807-2854.
4356 // zsh's `wantt` reads `v->pm->node.flags` AFTER
4357 // `aspar` has resolved the indirect target's Param.
4358 // We mirror that: for (Pt), look up `name`'s scalar
4359 // value to get the target name, then introspect
4360 // THAT parameter's type. The value pre-walker was
4361 // skipped above for the Pt combo.
4362 let target = if pt_combo {
4363 with_executor(|exec| exec.get_variable(&name))
4364 } else {
4365 name.clone()
4366 };
4367 let kind = with_executor(|exec| {
4368 // Delegate to the canonical (t)-flag formatter
4369 // which reads PM_TYPE flags from paramtab. The
4370 // exec.rs "parameters" arm of get_special_array
4371 // _value handles the same PM_INTEGER / PM_FFLOAT
4372 // / PM_LOWER / PM_READONLY flag dispatch.
4373 exec.get_special_array_value("parameters", &target)
4374 .unwrap_or_default()
4375 });
4376 state = St::S(kind);
4377 }
4378 '%' => {
4379 // Prompt expansion: process %F %B %f %{ %} etc. via the
4380 // executor's expand_prompt. Useful for building prompts
4381 // out of stored fragments.
4382 state = match state {
4383 St::S(s) => St::S(with_executor(|exec| exec.expand_prompt_string(&s))),
4384 St::A(a) => St::A(
4385 a.into_iter()
4386 .map(|s| with_executor(|exec| exec.expand_prompt_string(&s)))
4387 .collect(),
4388 ),
4389 };
4390 }
4391 'e' => {
4392 // Per zshexpn(1): "perform parameter expansion,
4393 // command substitution and arithmetic expansion
4394 // on the resulting word". Apply expand_string so
4395 // `\$var` (literal `$var` in the value) becomes
4396 // the value of $var, `\$(cmd)` runs the cmd, etc.
4397 let eval_one =
4398 |s: &str| -> String { crate::ported::subst::singsub(s) };
4399 state = match state {
4400 St::S(s) => St::S(eval_one(&s)),
4401 St::A(a) => St::A(a.into_iter().map(|s| eval_one(&s)).collect()),
4402 };
4403 }
4404 'p' => {
4405 // Print-style escape processing (mirrors print -e). Same
4406 // as `g` for the escape set we support — they differ in
4407 // zsh on some niche `\c` and `\E` forms, which we map
4408 // identically.
4409 let unescape = |s: &str| -> String {
4410 let mut out = String::with_capacity(s.len());
4411 let mut chars = s.chars().peekable();
4412 while let Some(c) = chars.next() {
4413 if c != '\\' {
4414 out.push(c);
4415 continue;
4416 }
4417 match chars.next() {
4418 Some('n') => out.push('\n'),
4419 Some('t') => out.push('\t'),
4420 Some('r') => out.push('\r'),
4421 Some('\\') => out.push('\\'),
4422 Some('e') | Some('E') => out.push('\x1b'),
4423 Some(other) => {
4424 out.push('\\');
4425 out.push(other);
4426 }
4427 None => out.push('\\'),
4428 }
4429 }
4430 out
4431 };
4432 state = match state {
4433 St::S(s) => St::S(unescape(&s)),
4434 St::A(a) => St::A(a.into_iter().map(|s| unescape(&s)).collect()),
4435 };
4436 }
4437 'A' => {
4438 // Coerce to array shape (alias of @). Mostly affects
4439 // downstream flags that treat scalar vs array
4440 // differently.
4441 state = match state {
4442 St::S(s) => St::A(vec![s]),
4443 a => a,
4444 };
4445 }
4446 '~' => {
4447 // Pattern-toggle: in zsh this enables glob-pattern
4448 // interpretation of the value in subsequent matches. The
4449 // bytecode dispatch already glob-matches via `Op::StrMatch`
4450 // when relevant; without a stateful match-context this
4451 // flag is a no-op pass-through. tracing::debug records
4452 // the request.
4453 tracing::debug!("PARAM_FLAG ~ — no-op pass-through (no match-context state)");
4454 }
4455 'p' => {
4456 // `(p)` — print-style escapes for OTHER flag args.
4457 // Already detected by the pre-scan above; here we
4458 // just consume the flag char without mutating
4459 // state (no-op on the value itself). Matches
4460 // src/zsh/Src/subst.c:2381-2382.
4461 }
4462 'g' => {
4463 // `(g)` — apply print-style escape decoding to
4464 // the operand value itself, with sub-flags
4465 // selecting which escape conventions to honor.
4466 // Sub-flags from src/zsh/Src/subst.c:2409-2436:
4467 // e — emacs-style: \C-x, \M-x, \e
4468 // o — octal: \NNN
4469 // c — caret notation: ^X for control chars
4470 // We honor any combination by running the same
4471 // C-style interpreter that `(p)` uses on `(s::)`
4472 // args; sub-flags currently widen but do not
4473 // narrow the escape set.
4474 if i < chars.len() && ZshrsHost::is_zsh_flag_delim(chars[i]) {
4475 let d = chars[i];
4476 i += 1;
4477 // Consume the sub-flag chars (e/o/c) — recorded
4478 // for documentation; the escape interpreter
4479 // below already handles all three cases.
4480 while i < chars.len() && chars[i] != d {
4481 i += 1;
4482 }
4483 if i < chars.len() {
4484 i += 1; // skip closing delim
4485 }
4486 }
4487 state = match state {
4488 St::S(s) => St::S(print_escape_str(&s)),
4489 St::A(a) => St::A(a.into_iter().map(|s| print_escape_str(&s)).collect()),
4490 };
4491 }
4492 '_' => {
4493 // `(_)` — reserved for future use per
4494 // src/zsh/Src/subst.c:2485-2502. Consume the
4495 // delim-bracketed arg if present so we don't
4496 // mis-parse subsequent flags.
4497 if i < chars.len() && ZshrsHost::is_zsh_flag_delim(chars[i]) {
4498 let d = chars[i];
4499 i += 1;
4500 while i < chars.len() && chars[i] != d {
4501 i += 1;
4502 }
4503 if i < chars.len() {
4504 i += 1;
4505 }
4506 }
4507 }
4508 'b' | 'B' => {
4509 // (b)/(B) — backslash-escape shell + pattern metas
4510 // (whitespace, glob/redirect/bslashquote/expansion specials).
4511 let escape = |s: &str| -> String {
4512 let mut r = String::new();
4513 for c in s.chars() {
4514 if "\\*?[]{}()<>&|;\"'$`!#~ \t\n".contains(c) {
4515 r.push('\\');
4516 }
4517 r.push(c);
4518 }
4519 r
4520 };
4521 state = match state {
4522 St::S(s) => St::S(escape(&s)),
4523 St::A(a) => St::A(a.iter().map(|s| escape(s)).collect()),
4524 };
4525 }
4526 _ => {
4527 // Unknown flag — silently skip. The maintainer's "no
4528 // friendly nags" rule means we don't print "unsupported
4529 // flag X"; tracing::debug records it in the log.
4530 tracing::debug!(flag = %c, "BUILTIN_PARAM_FLAG: unknown flag");
4531 }
4532 }
4533 }
4534
4535 // Direct port of Src/subst.c:3901-3933. When the caller is in
4536 // DQ context AND the state landed in `St::A` (e.g. via `(f)`
4537 // line-split, `(s:…:)` arbitrary split, or assoc/array seed
4538 // with no `[@]` splice), zsh's paramsubst joins the array back
4539 // into a single scalar via `sepjoin(aval, sep, 1)`:
4540 //
4541 // • If `sep` is non-NULL (set by `(F)` / `(j:…:)`), join
4542 // with that exact separator.
4543 // • Else if `spsep` is non-NULL (set by `(f)` / `(s:…:)`),
4544 // `sepjoin` falls back to the first IFS char (space by
4545 // default for `IFS=$' \t\n'`).
4546 //
4547 // Without this, `echo "[${(f)x}]"` (DQ) would word-split the
4548 // array into 3 separate echo args (`[line1] [line2] [line3]`)
4549 // instead of zsh's `[line1 line2 line3]`. The explicit `[@]`
4550 // splice operator OR `(@)` flag suppresses this collapse —
4551 // both already covered by `has_at_subscript` above.
4552 //
4553 // Skip the collapse when nested inside ANOTHER `${...}` —
4554 // `${${(f)x}[2]}` needs the inner `(f)` to keep its array
4555 // shape so the outer `[2]` can subscript element-2. C zsh
4556 // tracks this through paramsubst's recursion (the inner call
4557 // returns aval; outer operates on aval before any sepjoin).
4558 // We detect the same condition via `in_paramsubst_nest`,
4559 // bumped by every BUILTIN_PARAM_FLAG / BUILTIN_PARAM_*
4560 // recursion entry.
4561 // The DQ collapse fires only for "bare" arrays — those that
4562 // came from `${arr}` / `${assoc}` without a split flag. When
4563 // any split flag (`(z)`, `(f)`, `(s:STR:)`, `(0)`, `(=)`) was
4564 // applied the array shape is INTENTIONAL: zsh keeps it
4565 // multi-word inside DQ. Direct port of Src/subst.c's
4566 // `nojoin` behavior — the split flags set nojoin=1 which
4567 // causes paramsubst to skip sepjoin even in DQ.
4568 let split_flag_active = flags.contains('z')
4569 || flags.contains('f')
4570 || flags.contains('s')
4571 || flags.contains('0')
4572 || flags.contains('=');
4573 // Canonical paramsubst-nest counter — `IN_PARAMSUBST_NEST`
4574 // thread_local in `subst.rs` (mirrors `paramsub_nest` global
4575 // in `Src/subst.c`).
4576 let is_nested = crate::ported::subst::IN_PARAMSUBST_NEST
4577 .with(|c| c.get() > 1);
4578 if (dq_compile || dq_runtime) && !has_at_subscript && !is_nested && !split_flag_active {
4579 if let St::A(a) = state {
4580 // Pick the join separator. `(F)` (the last F seen) is
4581 // tracked via `flags.contains('F')`; `(j:str:)` runs
4582 // earlier in the loop and stores the result already
4583 // joined as `St::S(_)`, so we only see `St::A` here
4584 // for split-style flags. The default is the first
4585 // char of $IFS (space when IFS is the zsh default).
4586 let sep = if flags.contains('F') {
4587 "\n".to_string()
4588 } else {
4589 with_executor(|exec| {
4590 let ifs = exec.get_variable("IFS");
4591 ifs.chars().next().map(|c| c.to_string()).unwrap_or_else(|| " ".to_string())
4592 })
4593 };
4594 return Value::str(a.join(&sep));
4595 }
4596 }
4597
4598 match state {
4599 St::S(s) => Value::str(s),
4600 St::A(a) => Value::Array(a.into_iter().map(Value::str).collect()),
4601 }
4602 });
4603
4604 // `foo[key]=val` — single-key set on an assoc array. Stack: [name, key, value].
4605 vm.register_builtin(BUILTIN_SET_ASSOC, |vm, _argc| {
4606 let value = vm.pop().to_str();
4607 let key = vm.pop().to_str();
4608 let name = vm.pop().to_str();
4609 with_executor(|exec| {
4610 // PFA-SMR aspect: subscript assignment `arr[N]=val` /
4611 // `assoc[key]=val`. Recorded as a structured assoc/array
4612 // event with the (key, value) pair preserved in
4613 // `value_assoc` so replay can reconstruct the exact slot.
4614 // Path-family arrays come through SET_ARRAY / APPEND_ARRAY,
4615 // never here, so no path_mod routing.
4616 #[cfg(feature = "recorder")]
4617 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
4618 let ctx = exec.recorder_ctx();
4619 let attrs = exec.recorder_attrs_for(&name);
4620 crate::recorder::emit_assoc_assign(
4621 &name,
4622 vec![(key.clone(), value.clone())],
4623 attrs,
4624 true, // element-add semantics, not full replace
4625 ctx,
4626 );
4627 }
4628 // Indexed array element assign `a[N]=val`. Routes here when
4629 // `name` is already an indexed array. For unset names, only
4630 // treat as indexed if the key is unambiguously numeric (a
4631 // literal int) — `foo[key]=val` with no prior storage and
4632 // a string key should create an assoc (zsh default), not an
4633 // indexed array. zsh's rule: numeric subscript on an
4634 // indexed array (or new var with numeric key) assigns to
4635 // the 1-based slot, growing the array if needed. Negative
4636 // indices count from the end.
4637 let is_indexed = exec.array(&name).is_some();
4638 let is_assoc = exec.assoc(&name).is_some();
4639 let key_literal_int = key.trim().parse::<i64>().ok();
4640 // For an existing indexed array, fall back to arith eval so
4641 // `a[i+1]=v` works when `i` is set.
4642 let key_int_for_indexed = if is_indexed {
4643 key_literal_int.or_else(|| Some(crate::ported::math::mathevali(&crate::ported::subst::singsub(&key)).unwrap_or(0)))
4644 } else {
4645 key_literal_int
4646 };
4647 let route_indexed = if is_assoc {
4648 false
4649 } else if is_indexed {
4650 key_int_for_indexed.is_some()
4651 } else {
4652 key_literal_int.is_some()
4653 };
4654 if let (true, Some(i)) = (route_indexed, key_int_for_indexed) {
4655 let len = exec.array(&name).map(|a| a.len() as i64).unwrap_or(0);
4656 let idx = if i > 0 {
4657 (i - 1) as usize
4658 } else if i < 0 {
4659 let off = len + i;
4660 if off < 0 {
4661 return;
4662 }
4663 off as usize
4664 } else {
4665 // zsh: `a[0]=v` is "assignment to invalid subscript
4666 // range" (positionals/arrays are 1-based). Mirror
4667 // the diagnostic and abort with status 1.
4668 eprintln!("zshrs:1: {}: assignment to invalid subscript range", name);
4669 std::process::exit(1);
4670 };
4671 // Read paramtab-first, mutate, write back via
4672 // canonical set_array so the assignment is visible
4673 // to both the legacy cache and paramtab.
4674 let mut arr = exec.array(&name).unwrap_or_default();
4675 while arr.len() <= idx {
4676 arr.push(String::new());
4677 }
4678 arr[idx] = value;
4679 exec.set_array(name, arr);
4680 return;
4681 }
4682 // Default: assoc set.
4683 exec.unset_scalar(&name);
4684 let mut map = exec.assoc(&name).unwrap_or_default();
4685 map.insert(key, value);
4686 exec.set_assoc(name, map);
4687 });
4688 Value::Status(0)
4689 });
4690
4691 // Brace expansion. Routes through executor.xpandbraces (already
4692 // implemented for the tree-walker era). Returns Value::Array.
4693 vm.register_builtin(BUILTIN_WORD_SPLIT, |vm, _argc| {
4694 let s = vm.pop().to_str();
4695 let ifs = with_executor(|exec| {
4696 exec.scalar("IFS")
4697 .unwrap_or_else(|| " \t\n".to_string())
4698 });
4699 // Direct port of multsub's IFS-split path (src/zsh/Src/subst.c:
4700 // 567-680). zsh distinguishes WHITESPACE IFS (default) from
4701 // NON-WHITESPACE IFS:
4702 // - whitespace IFS chars (space/tab/newline): runs of separator
4703 // collapse and empty fields are SUPPRESSED
4704 // - non-whitespace IFS chars: every separator boundary creates a
4705 // field, including empties between adjacent separators
4706 // Mixed IFS treats whitespace runs as collapsing, but a single
4707 // non-whitespace IFS character creates a field boundary regardless.
4708 // zsh's default IFS is " \t\n\0" (space, tab, newline, NUL).
4709 // Treat NUL as whitespace-class so the default-IFS path
4710 // collapses runs and suppresses empties; without this the
4711 // NUL char triggered the non-whitespace branch and emitted
4712 // empty fields between every separator.
4713 let only_ws = ifs.chars().all(|c| matches!(c, ' ' | '\t' | '\n' | '\0'));
4714 let parts: Vec<fusevm::Value> = if only_ws {
4715 s.split(|c: char| ifs.contains(c))
4716 .filter(|p| !p.is_empty())
4717 .map(fusevm::Value::str)
4718 .collect()
4719 } else {
4720 // Non-whitespace IFS: preserve every separator boundary,
4721 // including empty fields. Matches zsh's behaviour for
4722 // `IFS=:; ${=a}` on `x:y::z` -> [x, y, "", z].
4723 s.split(|c: char| ifs.contains(c))
4724 .map(fusevm::Value::str)
4725 .collect()
4726 };
4727 // zsh: word-splitting an empty value yields ZERO words, not one
4728 // empty word. `unset b; for w in ${=b}` iterates zero times.
4729 // Whitespace-IFS path filtered out the empties already; the
4730 // non-whitespace path may have produced a single-empty Vec from
4731 // `"".split(...)` which still iterates once — collapse to an
4732 // empty Array so for-loops and arg expansion see no words.
4733 if parts.is_empty() || (parts.len() == 1 && parts[0].to_str().is_empty()) {
4734 fusevm::Value::Array(Vec::new())
4735 } else if parts.len() == 1 {
4736 parts.into_iter().next().unwrap()
4737 } else {
4738 fusevm::Value::Array(parts)
4739 }
4740 });
4741
4742 vm.register_builtin(BUILTIN_BRACE_EXPAND, |vm, _argc| {
4743 let s = vm.pop().to_str();
4744 // Direct call to the canonical brace expander (port of
4745 // Src/glob.c::xpandbraces at glob.rs:1678). Was stubbed
4746 // as `vec![s]` — every `print X{1,2,3}Y` returned literal.
4747 let brace_ccl = with_executor(|exec|
4748 crate::ported::options::opt_state_get("braceccl").unwrap_or(false));
4749 let parts = crate::ported::glob::xpandbraces(&s, brace_ccl);
4750 if parts.len() == 1 {
4751 fusevm::Value::str(parts.into_iter().next().unwrap_or_default())
4752 } else {
4753 fusevm::Value::Array(parts.into_iter().map(fusevm::Value::str).collect())
4754 }
4755 });
4756
4757 // `[[ s =~ pat ]]` regex match — extra-builtin fallback path so the
4758 // conditional grammar can route here when Op::RegexMatch isn't wired.
4759 // Uses the same regex cache as the host method.
4760 vm.register_builtin(BUILTIN_REGEX_MATCH, |vm, _argc| {
4761 let pat = vm.pop().to_str();
4762 let s = vm.pop().to_str();
4763 // Same untokenize before regex compile as ZshrsHost::regex_match
4764 // — Snull/DQ markers from quoted patterns must be stripped
4765 // before the regex engine sees them. Direct port of
4766 // bin_test/cond_match's untokenize() call.
4767 let pat = crate::lex::untokenize(&pat);
4768 let s = crate::lex::untokenize(&s);
4769 let mut cache = REGEX_CACHE.lock();
4770 let matched = if let Some(re) = cache.get(&pat) {
4771 re.is_match(&s)
4772 } else {
4773 match regex::Regex::new(&pat) {
4774 Ok(re) => {
4775 let m = re.is_match(&s);
4776 cache.insert(pat.clone(), re);
4777 m
4778 }
4779 Err(_) => false,
4780 }
4781 };
4782 if matched {
4783 Value::Status(0)
4784 } else {
4785 Value::Status(1)
4786 }
4787 });
4788
4789 // `*(qual)` glob qualifier filter. Stack: [pattern, qualifier].
4790 // Pattern is glob-expanded normally, then each result is filtered by the
4791 // qualifier predicate. Common qualifiers:
4792 // . — regular files only
4793 // / — directories only
4794 // @ — symlinks
4795 // x — executable
4796 // r/w/x — readable/writable/executable
4797 // N — nullglob (no error if no match)
4798 // L+N / L-N — size > N / size < N (in bytes)
4799 // mh-N / mh+N — modified within N hours / older than N hours
4800 // md-N / md+N — modified within N days / older than N days
4801 // on/On — sort by name asc/desc (default)
4802 // oL/OL — sort by length
4803 // om/Om — sort by mtime
4804 // Pop a scalar pattern, run expand_glob, push Value::Array. Used
4805 // by the segment-concat compile path for `$D/*`-style words.
4806 vm.register_builtin(BUILTIN_GLOB_EXPAND, |vm, _argc| {
4807 let pattern = vm.pop().to_str();
4808 let matches = with_executor(|exec| exec.expand_glob(&pattern));
4809 if matches.is_empty() {
4810 // expand_glob handles NOMATCH internally; if it returns
4811 // empty here, nullglob was on. Yield empty array.
4812 return fusevm::Value::Array(Vec::new());
4813 }
4814 if matches.len() == 1 && matches[0] == pattern {
4815 // No real matches; expand_glob returned the literal. Pass
4816 // back as scalar so downstream ops don't re-flatten.
4817 return fusevm::Value::str(pattern);
4818 }
4819 fusevm::Value::Array(matches.into_iter().map(fusevm::Value::str).collect())
4820 });
4821
4822 vm.register_builtin(BUILTIN_GLOB_QUALIFIED, |vm, _argc| {
4823 let qual = vm.pop().to_str();
4824 let pattern = vm.pop().to_str();
4825 let nullglob = qual.contains('N');
4826 let mut matches = with_executor(|exec| exec.expand_glob(&pattern));
4827 if matches.is_empty() && !nullglob {
4828 // Default: keep the unmatched pattern (zsh's default unless N is set)
4829 return fusevm::Value::Array(vec![fusevm::Value::str(pattern)]);
4830 }
4831 // Filter by predicates that require stat
4832 matches.retain(|path| {
4833 // zsh's `-` modifier in glob qualifiers (`*(-.)`) means
4834 // "follow symlinks before applying the test". Without
4835 // `-`, `(.)` uses lstat (skipping symlinks even when
4836 // they target a regular file). Direct port of zsh's
4837 // pattern.c qualifier parser — the QUAL_NULL bit is set
4838 // by `-` and switches stat→lstat-vs-stat. Default Rust
4839 // `fs::metadata` follows symlinks; use `symlink_metadata`
4840 // by default, switch to `metadata` when `-` is in the
4841 // qualifier set.
4842 let follow_symlinks = qual.contains('-');
4843 let meta_res = if follow_symlinks {
4844 fs::metadata(path)
4845 } else {
4846 fs::symlink_metadata(path)
4847 };
4848 let meta = match meta_res {
4849 Ok(m) => m,
4850 Err(_) => return qual.contains('N'),
4851 };
4852 let mut keep = true;
4853 for c in qual.chars() {
4854 match c {
4855 '.' => keep &= meta.is_file(),
4856 '/' => keep &= meta.is_dir(),
4857 '@' => {
4858 // is_symlink requires fs::symlink_metadata for the
4859 // path itself, not the target.
4860 keep &= fs::symlink_metadata(path)
4861 .map(|m| m.file_type().is_symlink())
4862 .unwrap_or(false);
4863 }
4864 'x' => {
4865 keep &= meta.permissions().mode() & 0o111 != 0;
4866 }
4867 'r' => {
4868 keep &= meta.permissions().mode() & 0o444 != 0;
4869 }
4870 'w' => {
4871 keep &= meta.permissions().mode() & 0o222 != 0;
4872 }
4873 _ => {}
4874 }
4875 if !keep {
4876 break;
4877 }
4878 }
4879 keep
4880 });
4881 // Sort modifiers
4882 if qual.contains("on") || qual.contains('o') && !qual.contains("om") && !qual.contains("oL")
4883 {
4884 matches.sort();
4885 }
4886 if qual.contains("On")
4887 || (qual.contains('O') && !qual.contains("Om") && !qual.contains("OL"))
4888 {
4889 matches.sort();
4890 matches.reverse();
4891 }
4892 if qual.contains("oL") {
4893 matches.sort_by_key(|p| std::fs::metadata(p).map(|m| m.len()).unwrap_or(0));
4894 }
4895 if qual.contains("OL") {
4896 matches.sort_by_key(|p| {
4897 std::cmp::Reverse(std::fs::metadata(p).map(|m| m.len()).unwrap_or(0))
4898 });
4899 }
4900 if qual.contains("om") {
4901 matches.sort_by_key(|p| {
4902 std::fs::metadata(p)
4903 .and_then(|m| m.modified())
4904 .map(|t| {
4905 std::cmp::Reverse(
4906 t.duration_since(std::time::UNIX_EPOCH)
4907 .map(|d| d.as_secs())
4908 .unwrap_or(0),
4909 )
4910 })
4911 .unwrap_or(std::cmp::Reverse(0))
4912 });
4913 }
4914 if qual.contains("Om") {
4915 matches.sort_by_key(|p| {
4916 std::fs::metadata(p)
4917 .and_then(|m| m.modified())
4918 .map(|t| {
4919 t.duration_since(std::time::UNIX_EPOCH)
4920 .map(|d| d.as_secs())
4921 .unwrap_or(0)
4922 })
4923 .unwrap_or(0)
4924 });
4925 }
4926 // (M) mark-dirs / (T) list-types qualifiers — direct port of
4927 // zsh/Src/glob.c:1557-1566 (case 'M' / case 'T'). zsh appends
4928 // a single char to each output (or only to dirs for `M`):
4929 // / directory * executable regular file
4930 // @ symlink | fifo
4931 // = socket # block device % char device
4932 //
4933 // M alone marks ONLY directories with `/`; T marks every
4934 // file with its type char. Both sourced from glob.c:355,372
4935 // emit-side logic on gf_markdirs / gf_listtypes flags.
4936 let mark_dirs = qual.contains('M');
4937 let list_types = qual.contains('T');
4938 if mark_dirs || list_types {
4939 matches = matches
4940 .into_iter()
4941 .map(|p| {
4942 let meta = match std::fs::symlink_metadata(&p) {
4943 Ok(m) => m,
4944 Err(_) => return p,
4945 };
4946 let mode = meta.permissions().mode();
4947 let ch = crate::glob::file_type(mode);
4948 if list_types || (mark_dirs && ch == '/') {
4949 format!("{}{}", p, ch)
4950 } else {
4951 p
4952 }
4953 })
4954 .collect();
4955 }
4956 fusevm::Value::Array(matches.into_iter().map(fusevm::Value::str).collect())
4957 });
4958
4959 // `break`/`continue` from a sub-VM body. The compile path emits these
4960 // when the keyword appears at chunk top-level (no enclosing for/while in
4961 // the current chunk's patch lists). Outer-loop builtins (BUILTIN_RUN_
4962 // SELECT and any future loop-via-builtin construct) drain
4963 // executor.loop_signal after each iteration.
4964 vm.register_builtin(BUILTIN_SET_BREAK, |_vm, _argc| {
4965 with_executor(|exec| {
4966 exec.loop_signal = Some(LoopSignal::Break);
4967 });
4968 Value::Status(0)
4969 });
4970 vm.register_builtin(BUILTIN_SET_CONTINUE, |_vm, _argc| {
4971 with_executor(|exec| {
4972 exec.loop_signal = Some(LoopSignal::Continue);
4973 });
4974 Value::Status(0)
4975 });
4976
4977 // `m[k]+=tail` — append onto the existing value (string concat). Mirrors
4978 // zsh's += behavior on assoc-array entries. Missing key creates it with
4979 // just `tail`, matching SET_ASSOC's create-on-demand.
4980 vm.register_builtin(BUILTIN_APPEND_ASSOC, |vm, _argc| {
4981 let tail = vm.pop().to_str();
4982 let key = vm.pop().to_str();
4983 let name = vm.pop().to_str();
4984 with_executor(|exec| {
4985 exec.unset_scalar(&name);
4986 let mut map = exec.assoc(&name).unwrap_or_default();
4987 match map.get_mut(&key) {
4988 Some(existing) => existing.push_str(&tail),
4989 None => {
4990 map.insert(key.clone(), tail.clone());
4991 }
4992 }
4993 exec.set_assoc(name.clone(), map);
4994 // PFA-SMR aspect: assoc subscript-append `m[k]+=tail`.
4995 // Recorder emits a structured assoc event with the
4996 // POST-append value so replay reconstructs end state
4997 // directly (no need to model the +=tail concat).
4998 #[cfg(feature = "recorder")]
4999 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
5000 let ctx = exec.recorder_ctx();
5001 let attrs = exec.recorder_attrs_for(&name);
5002 let new_val = exec
5003 .assoc(&name)
5004 .and_then(|m| m.get(&key).cloned())
5005 .unwrap_or_default();
5006 crate::recorder::emit_assoc_assign(
5007 &name,
5008 vec![(key.clone(), new_val)],
5009 attrs,
5010 true,
5011 ctx,
5012 );
5013 }
5014 });
5015 Value::Status(0)
5016 });
5017
5018 vm.register_builtin(BUILTIN_ARRAY_LENGTH, |vm, _argc| {
5019 let name = vm.pop().to_str();
5020 let len = with_executor(|exec| exec.array(&name).map(|a| a.len()).unwrap_or(0));
5021 Value::str(len.to_string())
5022 });
5023
5024 // `${arr[*]}` — join array elements with the first IFS char into
5025 // a single string. Matches zsh: in DQ context this preserves the
5026 // join; in array context too the result is one Value::Str.
5027 // Set or clear a shell option directly. Used by `noglob CMD ...`
5028 // precommand wrapping — the compiler emits SET_RAW_OPT to flip the
5029 // option ON before compiling the inner words and OFF after, so glob
5030 // expansion of the inner args sees the temporary state.
5031 vm.register_builtin(BUILTIN_SET_RAW_OPT, |vm, _argc| {
5032 let on = vm.pop().to_int() != 0;
5033 let opt = vm.pop().to_str();
5034 with_executor(|exec| {
5035 if on {
5036 crate::ported::options::opt_state_set(&opt, true);
5037 } else {
5038 crate::ported::options::opt_state_unset(&opt);
5039 }
5040 });
5041 Value::Status(0)
5042 });
5043
5044 vm.register_builtin(BUILTIN_ARRAY_JOIN_STAR, |vm, _argc| {
5045 let name = vm.pop().to_str();
5046 let result = with_executor(|exec| {
5047 let sep = exec
5048 .scalar("IFS")
5049 .and_then(|s| s.chars().next())
5050 .map(|c| c.to_string())
5051 .unwrap_or_else(|| " ".to_string());
5052 if name == "@" || name == "*" || name == "argv" {
5053 return exec.pparams().join(&sep);
5054 }
5055 if let Some(arr) = exec.array(&name) {
5056 arr.join(&sep)
5057 } else {
5058 exec.get_variable(&name)
5059 }
5060 });
5061 fusevm::Value::str(result)
5062 });
5063
5064 vm.register_builtin(BUILTIN_ARRAY_ALL, |vm, _argc| {
5065 let name = vm.pop().to_str();
5066 with_executor(|exec| {
5067 // Special positional names — splice the positional list.
5068 if name == "@" || name == "*" || name == "argv" {
5069 return Value::Array(exec.pparams().iter().map(Value::str).collect());
5070 }
5071 match exec.array(&name) {
5072 Some(v) => Value::Array(v.iter().map(Value::str).collect()),
5073 None => {
5074 // Fall back to scalar lookup. zsh (unlike bash)
5075 // does NOT IFS-split a scalar variable in a for
5076 // list — `for w in $scalar` iterates ONCE with the
5077 // scalar value. Word-splitting requires either
5078 // sh_word_split option or explicit `${(s.,.)scalar}`.
5079 let val = exec.get_variable(&name);
5080 if val.is_empty()
5081 && !exec.has_scalar(&name)
5082 && std::env::var(&name).is_err()
5083 {
5084 Value::Array(vec![])
5085 } else if crate::ported::options::opt_state_get("shwordsplit").unwrap_or(false) {
5086 // bash-compat: under setopt sh_word_split, do
5087 // split scalars on IFS chars.
5088 let ifs = exec
5089 .scalar("IFS")
5090 .unwrap_or_else(|| " \t\n".to_string());
5091 let parts: Vec<Value> = val
5092 .split(|c: char| ifs.contains(c))
5093 .filter(|s| !s.is_empty())
5094 .map(Value::str)
5095 .collect();
5096 Value::Array(parts)
5097 } else {
5098 Value::Array(vec![Value::str(val)])
5099 }
5100 }
5101 }
5102 })
5103 });
5104
5105 // BUILTIN_ARRAY_FLATTEN(N): pops N values, flattens one level of Array
5106 // nesting, pushes the resulting Array AND its length as a separate Int.
5107 // The two-value return shape lets the caller (for-loop compile path)
5108 // SetSlot the length before SetSlot'ing the array, without re-deriving
5109 // the length from the array via a second builtin call.
5110 // `coproc [name] { body }` — bidirectional pipe to backgrounded body.
5111 // Stack discipline (top first): [name (str, "" for default), sub_idx (int)].
5112 // On success: parent's `executor.arrays[name]` becomes [write_fd, read_fd]
5113 // and Status(0) is returned. The caller writes to the child's stdin via
5114 // write_fd, reads its stdout via read_fd, and closes both when done.
5115 //
5116 // Bash's coproc convention is `${NAME[0]}` = read_fd, `${NAME[1]}` =
5117 // write_fd. We follow that: arrays[name] = [read_fd_str, write_fd_str].
5118 vm.register_builtin(BUILTIN_RUN_COPROC, |vm, _argc| {
5119 let sub_idx = vm.pop().to_int() as usize;
5120 let raw_name = vm.pop().to_str();
5121 let name = if raw_name.is_empty() {
5122 "COPROC".to_string()
5123 } else {
5124 raw_name
5125 };
5126 let chunk = match vm.chunk.sub_chunks.get(sub_idx).cloned() {
5127 Some(c) => c,
5128 None => return Value::Status(1),
5129 };
5130
5131 // (parent_read ← child_stdout)
5132 let mut p2c = [0i32; 2]; // parent writes, child reads
5133 let mut c2p = [0i32; 2]; // child writes, parent reads
5134 if unsafe { libc::pipe(p2c.as_mut_ptr()) } < 0 {
5135 return Value::Status(1);
5136 }
5137 if unsafe { libc::pipe(c2p.as_mut_ptr()) } < 0 {
5138 unsafe {
5139 libc::close(p2c[0]);
5140 libc::close(p2c[1]);
5141 }
5142 return Value::Status(1);
5143 }
5144
5145 match unsafe { libc::fork() } {
5146 -1 => {
5147 unsafe {
5148 libc::close(p2c[0]);
5149 libc::close(p2c[1]);
5150 libc::close(c2p[0]);
5151 libc::close(c2p[1]);
5152 }
5153 Value::Status(1)
5154 }
5155 0 => {
5156 // Child: stdin from p2c[0], stdout to c2p[1]. Close all
5157 // unused fds. setsid so SIGINT to fg doesn't hit us.
5158 unsafe {
5159 libc::dup2(p2c[0], libc::STDIN_FILENO);
5160 libc::dup2(c2p[1], libc::STDOUT_FILENO);
5161 libc::close(p2c[0]);
5162 libc::close(p2c[1]);
5163 libc::close(c2p[0]);
5164 libc::close(c2p[1]);
5165 libc::setsid();
5166 }
5167 let mut co_vm = fusevm::VM::new(chunk);
5168 register_builtins(&mut co_vm);
5169 let _ = co_vm.run();
5170 let _ = std::io::stdout().flush();
5171 let _ = std::io::stderr().flush();
5172 std::process::exit(co_vm.last_status);
5173 }
5174 _pid => {
5175 // Parent: close child ends, store [read_fd, write_fd] in NAME.
5176 unsafe {
5177 libc::close(p2c[0]);
5178 libc::close(c2p[1]);
5179 }
5180 let read_fd = c2p[0];
5181 let write_fd = p2c[1];
5182 with_executor(|exec| {
5183 exec.unset_scalar(&name);
5184 exec.set_array(name, vec![read_fd.to_string(), write_fd.to_string()]);
5185 });
5186 Value::Status(0)
5187 }
5188 }
5189 });
5190
5191 vm.register_builtin(BUILTIN_ARRAY_FLATTEN, |vm, argc| {
5192 let n = argc as usize;
5193 let start = vm.stack.len().saturating_sub(n);
5194 let raw: Vec<fusevm::Value> = vm.stack.drain(start..).collect();
5195 let mut flat: Vec<fusevm::Value> = Vec::with_capacity(raw.len());
5196 for v in raw {
5197 match v {
5198 fusevm::Value::Array(items) => flat.extend(items),
5199 other => flat.push(other),
5200 }
5201 }
5202 let len = flat.len() as i64;
5203 // Push the array first; the Int(len) becomes the builtin's return
5204 // value (which CallBuiltin already pushes). Caller consumes in
5205 // reverse: SetSlot(len_slot) pops Int, SetSlot(arr_slot) pops Array.
5206 vm.push(fusevm::Value::Array(flat));
5207 fusevm::Value::Int(len)
5208 });
5209
5210 // Shell variable get/set — routes through executor.variables so nested
5211 // VMs (function calls) and tree-walker callers see the same storage.
5212 vm.register_builtin(BUILTIN_GET_VAR, |vm, argc| {
5213 let args = pop_args(vm, argc);
5214 let name = args.into_iter().next().unwrap_or_default();
5215 let live_status = vm.last_status;
5216 // `$@` and `$*` need splice semantics — return Value::Array of
5217 // positional params so for-loop's BUILTIN_ARRAY_FLATTEN spreads them
5218 // and pop_args splits them into argv slots. zsh's `"$@"` bslashquote-each-
5219 // word semantics matches: each pos-param becomes its own arg.
5220 // Same for arrays accessed by name (e.g. `$arr` in some contexts).
5221 let sync_status = |exec: &mut ShellExecutor| {
5222 exec.set_last_status(live_status);
5223 };
5224 if name == "@" || name == "*" {
5225 return with_executor(|exec| {
5226 sync_status(exec);
5227 fusevm::Value::Array(
5228 exec.pparams()
5229 .iter()
5230 .map(fusevm::Value::str)
5231 .collect(),
5232 )
5233 });
5234 }
5235 // RC_EXPAND_PARAM: when the option is set and `name` refers to
5236 // an array, return Value::Array so the enclosing word's
5237 // BUILTIN_CONCAT_DISTRIBUTE distributes element-wise. Without
5238 // the option, arrays still join to a space-separated scalar
5239 // (zsh's default unquoted-array-as-scalar semantics).
5240 let rc_expand =
5241 with_executor(|exec| crate::ported::options::opt_state_get("rcexpandparam").unwrap_or(false));
5242 if rc_expand {
5243 let arr_val = with_executor(|exec| {
5244 sync_status(exec);
5245 exec.array(&name)
5246 });
5247 if let Some(arr) = arr_val {
5248 return fusevm::Value::Array(arr.into_iter().map(fusevm::Value::str).collect());
5249 }
5250 }
5251 // Magic-assoc fallback FIRST — `${aliases}` / `${functions}`
5252 // / `${commands}` / etc. should return the value list per
5253 // zsh's bare-assoc semantics. Without this, those names fell
5254 // through to `get_variable` which is empty (they live in
5255 // separate executor tables, not `assoc_arrays`). Return as
5256 // a Value::Array so `arr=(${aliases})` distributes into
5257 // multiple elements, matching zsh's array-context word
5258 // splitting for assoc-bare references.
5259 let magic_vals = with_executor(|exec| {
5260 sync_status(exec);
5261 crate::exec::scan_magic_assoc_keys(&name).map(|keys| {
5262 keys.iter()
5263 .map(|k| exec.get_special_array_value(&name, k).unwrap_or_default())
5264 .collect::<Vec<_>>()
5265 })
5266 });
5267 if let Some(vals) = magic_vals {
5268 // Distinguish "name IS a magic-assoc with no entries"
5269 // (return Array(empty)) from "name is unknown — fall
5270 // through to get_variable".
5271 return fusevm::Value::Array(vals.into_iter().map(fusevm::Value::str).collect());
5272 }
5273 // Indexed-array path: return Value::Array so pop_args splats
5274 // each element into its own argv slot. Direct port of zsh's
5275 // unquoted `$arr` semantics — each element becomes a separate
5276 // word in command-arg position.
5277 //
5278 // DQ context exception: inside `"...$arr..."`, zsh joins with
5279 // the first char of $IFS (default space) so the DQ word stays
5280 // a single argv slot. Detect via in_dq_context (bumped by
5281 // BUILTIN_EXPAND_TEXT mode 1) and return the joined scalar.
5282 // Direct port of Src/subst.c:1759-1813 nojoin/sepjoin: in DQ
5283 // (qt=1) without explicit `(@)`, sepjoin runs and the result
5284 // is one word.
5285 let arr_assoc_data = with_executor(|exec| {
5286 sync_status(exec);
5287 let in_dq = exec.in_dq_context > 0;
5288 // KSH_ARRAYS: bare `$arr` returns ONLY arr[0] (zero-
5289 // based first-element-only semantics). Direct port of
5290 // Src/params.c getstrvalue's KSH_ARRAYS gate which
5291 // returns aval[0] instead of the whole array.
5292 let ksh_arrays = crate::ported::options::opt_state_get("ksharrays").unwrap_or(false);
5293 if let Some(arr) = exec.array(&name) {
5294 if ksh_arrays {
5295 return Some((vec![arr.first().cloned().unwrap_or_default()], in_dq));
5296 }
5297 return Some((arr.clone(), in_dq));
5298 }
5299 if let Some(map) = exec.assoc(&name) {
5300 let mut keys: Vec<&String> = map.keys().collect();
5301 keys.sort();
5302 let values: Vec<String> = keys
5303 .iter()
5304 .filter_map(|k| map.get(*k).cloned())
5305 .collect();
5306 if ksh_arrays {
5307 return Some((vec![values.into_iter().next().unwrap_or_default()], in_dq));
5308 }
5309 return Some((values, in_dq));
5310 }
5311 None
5312 });
5313 if let Some((items, in_dq)) = arr_assoc_data {
5314 if in_dq {
5315 let sep = with_executor(|exec| {
5316 exec.get_variable("IFS")
5317 .chars()
5318 .next()
5319 .map(|c| c.to_string())
5320 .unwrap_or_else(|| " ".to_string())
5321 });
5322 return Value::str(items.join(&sep));
5323 }
5324 return fusevm::Value::Array(items.into_iter().map(fusevm::Value::str).collect());
5325 }
5326 let (val, in_dq) = with_executor(|exec| {
5327 sync_status(exec);
5328 (exec.get_variable(&name), exec.in_dq_context > 0)
5329 });
5330 // Empty unquoted scalar → drop the arg (zsh "remove empty
5331 // unquoted words" rule). Returning empty Value::Array makes
5332 // pop_args contribute zero items. DQ context keeps the empty
5333 // string so "$a" stays a single empty arg. Direct port of
5334 // subst.c's elide-empty pass.
5335 if val.is_empty() && !in_dq {
5336 return fusevm::Value::Array(Vec::new());
5337 }
5338 Value::str(val)
5339 });
5340
5341 // `name+=val` (no parens) — runtime dispatch:
5342 // - if `name` is in `arrays` → push `val` as new element
5343 // - if `name` is in `assoc_arrays` → refuse (zsh errors here)
5344 // - else → scalar concat (existing behavior)
5345 // Stack: [name, value].
5346 vm.register_builtin(BUILTIN_APPEND_SCALAR_OR_PUSH, |vm, argc| {
5347 let args = pop_args(vm, argc);
5348 let mut iter = args.into_iter();
5349 let name = iter.next().unwrap_or_default();
5350 let value = iter.next().unwrap_or_default();
5351 with_executor(|exec| {
5352 if let Some(mut arr) = exec.array(&name) {
5353 arr.push(value.clone());
5354 exec.set_array(name.clone(), arr);
5355 // PFA-SMR aspect: `name+=elem` array push (scalar form
5356 // resolved to existing indexed array). is_append=true.
5357 #[cfg(feature = "recorder")]
5358 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
5359 let ctx = exec.recorder_ctx();
5360 let attrs = exec.recorder_attrs_for(&name);
5361 emit_path_or_assign(&name, std::slice::from_ref(&value), attrs, true, &ctx);
5362 }
5363 return;
5364 }
5365 if exec.assoc(&name).is_some() {
5366 eprintln!("zshrs: {}: cannot use += on assoc without (key val)", name);
5367 return;
5368 }
5369 // typeset -i: `+=` is arithmetic add, not string concat.
5370 // `typeset -i x=42; x+=8` must store 50, not "428". Per
5371 // Src/params.c assignsparam:3270-3293, the PM_TYPE switch
5372 // routes integer/float through matheval. Read PM_INTEGER
5373 // from the canonical Param flags.
5374 let is_integer = exec.is_integer_param(&name);
5375 if is_integer {
5376 let prev = exec.get_variable(&name);
5377 let prev_n: i64 = prev.parse().unwrap_or(0);
5378 let added = crate::ported::math::mathevali(&crate::ported::subst::singsub(&value)).unwrap_or(0);
5379 let new_val = (prev_n + added).to_string();
5380 exec.set_scalar(name.clone(), new_val.clone());
5381 // PFA-SMR aspect: integer-typed append. The append
5382 // operator is arithmetic; replay should restore the
5383 // POST-add value so the bundle reflects end state.
5384 #[cfg(feature = "recorder")]
5385 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
5386 let ctx = exec.recorder_ctx();
5387 let attrs = exec.recorder_attrs_for(&name);
5388 crate::recorder::emit_assign_typed(&name, &new_val, attrs, ctx);
5389 }
5390 return;
5391 }
5392 // Scalar concat.
5393 let prev = exec.get_variable(&name);
5394 let combined = format!("{}{}", prev, value);
5395 exec.set_scalar(name.clone(), combined.clone());
5396 // PFA-SMR aspect: scalar concat (`PATH+=":/foo"` and any
5397 // other `NAME+=tail` shape). For PATH-family scalars the
5398 // path-or-assign helper still emits a path_mod with the
5399 // FULL post-concat value so replay knows the end state.
5400 #[cfg(feature = "recorder")]
5401 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
5402 let ctx = exec.recorder_ctx();
5403 let attrs = exec.recorder_attrs_for(&name);
5404 let lower = name.to_ascii_lowercase();
5405 if matches!(
5406 lower.as_str(),
5407 "path" | "fpath" | "manpath" | "module_path" | "cdpath"
5408 ) {
5409 emit_path_or_assign(
5410 &name,
5411 std::slice::from_ref(&combined),
5412 attrs,
5413 true,
5414 &ctx,
5415 );
5416 } else {
5417 crate::recorder::emit_assign_typed(&name, &combined, attrs, ctx);
5418 }
5419 }
5420 });
5421 Value::Status(0)
5422 });
5423
5424 vm.register_builtin(BUILTIN_SET_VAR, |vm, argc| {
5425 let args = pop_args(vm, argc);
5426 let mut iter = args.into_iter();
5427 let name = iter.next().unwrap_or_default();
5428 let value = iter.next().unwrap_or_default();
5429 let blocked = with_executor(|exec| {
5430 // zsh has a fixed set of intrinsic read-only specials that
5431 // can never be assigned to from script. This is a hard
5432 // wired list (params.c `ROVAR` flag) — not user-settable.
5433 // NOTE: `_` is NOT readonly — zsh allows assignments to
5434 // and `unset` of it (it's just the last-arg auto-update).
5435 // ZSH_ARGZERO is also writable in zsh per Src/params.c
5436 // (uses PM_SCALAR without PM_READONLY); zinit's startup
5437 // line `ZSH_ARGZERO=$0` relies on this.
5438 let is_intrinsic_ro = matches!(
5439 name.as_str(),
5440 "PPID" | "LINENO" | "argv0" | "ARGC"
5441 );
5442 let is_ro = is_intrinsic_ro || exec.is_readonly_param(&name);
5443 if is_ro {
5444 eprintln!("zshrs:1: read-only variable: {}", name);
5445 // Mirror zsh -c: read-only assignment failure aborts
5446 // the shell with status 1, not just the command.
5447 std::process::exit(1);
5448 }
5449 // If the variable was previously declared `integer` (or
5450 // `typeset -i`), arith-evaluate the value before storing.
5451 // zsh: `integer i; i=5*3` stores 15. Mirrors C's PM_TYPE
5452 // dispatch at Src/params.c assignsparam:3270.
5453 let is_integer = exec.is_integer_param(&name);
5454 // `typeset -i N` base-formatting reads `Param.base` directly
5455 // (Src/zsh.h:1860 — int print base). Per C convfloat /
5456 // convbase in params.c, base==0 means default decimal.
5457 let int_base: Option<u32> = if is_integer {
5458 let b = crate::ported::params::paramtab().read().ok()
5459 .and_then(|t| t.get(&name).map(|pm| pm.base))
5460 .unwrap_or(0);
5461 if b > 0 { Some(b as u32) } else { None }
5462 } else {
5463 None
5464 };
5465 let stored = if is_integer && !value.is_empty() {
5466 let evaluated = crate::ported::math::mathevali(&crate::ported::subst::singsub(&value)).unwrap_or(0).to_string();
5467 if let Some(base) = int_base {
5468 evaluated
5469 .parse::<i64>()
5470 .map(|n| format_int_in_base(n, base))
5471 .unwrap_or(evaluated)
5472 } else {
5473 evaluated
5474 }
5475 } else {
5476 value.clone()
5477 };
5478 // c:Src/params.c — `typeset -l` (PM_LOWER) / `-u`
5479 // (PM_UPPER) case-fold the assigned value before storage.
5480 // Direct port of the PM_LOWER/PM_UPPER setstrvalue arms.
5481 let stored = if exec.is_uppercase_param(&name) {
5482 stored.to_uppercase()
5483 } else if exec.is_lowercase_param(&name) {
5484 stored.to_lowercase()
5485 } else {
5486 stored
5487 };
5488 // Mirror scalar→array if name is the scalar side of a
5489 // typeset -T tie. Direct port of Src/params.c PM_TIED:
5490 // assigning to PATH must update both `path` (the array
5491 // mirror) and the process env (so child execs see the
5492 // new value, and so find_in_path / external lookups
5493 // resolve correctly). Without the env::set_var step
5494 // here, `PATH=/nope; ls` continued to find ls via the
5495 // shell's startup-time env PATH.
5496 if let Some((arr_name, sep)) = exec.tied_scalar_to_array.get(&name).cloned() {
5497 let parts: Vec<String> = if stored.is_empty() {
5498 Vec::new()
5499 } else {
5500 stored.split(&sep).map(String::from).collect()
5501 };
5502 exec.set_array(arr_name, parts);
5503 std::env::set_var(&name, &stored);
5504 // Clear the command hash on PATH change so subsequent
5505 // command lookups walk the new PATH instead of
5506 // returning stale absolute paths from before the
5507 // assignment. zsh's bin_set rehashes lazily; this is
5508 // the simplest equivalent.
5509 if name == "PATH" {
5510 if let Ok(mut t) = crate::ported::hashtable::cmdnamtab_lock().write() {
5511 t.clear();
5512 }
5513 }
5514 let _ = exec; // silence unused-binding in the no-PATH branch
5515 }
5516 // zsh enforces a minimum of 1 on `HISTSIZE` — `HISTSIZE=0`
5517 // and `HISTSIZE=-5` both clamp to `1`. Mirror at storage
5518 // time so subsequent reads return the clamped value.
5519 let stored = if name == "HISTSIZE" {
5520 stored
5521 .parse::<i64>()
5522 .map(|n| n.max(1).to_string())
5523 .unwrap_or_else(|_| stored.clone())
5524 } else {
5525 stored
5526 };
5527 // If we're inside an inline-assignment frame (`X=foo cmd`
5528 // is currently exec'ing the prefix), record the previous
5529 // value so END_INLINE_ENV can restore it after the command
5530 // returns. Then export the new value to the env so the
5531 // child sees it. zsh's `X=foo cmd` semantics: shell
5532 // variable AND env entry both vanish after cmd returns.
5533 let in_inline_env = !exec.inline_env_stack.is_empty();
5534 if in_inline_env {
5535 let prev_var = crate::ported::params::getsparam(&name);
5536 let prev_env = std::env::var(&name).ok();
5537 exec.inline_env_stack
5538 .last_mut()
5539 .unwrap()
5540 .push((name.clone(), prev_var, prev_env));
5541 std::env::set_var(&name, &stored);
5542 }
5543 exec.set_scalar(name.clone(), stored.clone());
5544 // Mirror the write into paramtab (the C-port canonical
5545 // store at `Src/params.c:3350 setsparam`). Without this,
5546 // `src/ported/subst.rs::vars_get` and
5547 // `src/ported/params.rs::getsparam` see paramtab-only and
5548 // miss script-level `x=hello` assignments — heredoc body
5549 // substitution, `${x}` inside `singsub`, and any other
5550 // C-port reader that doesn't go through fusevm's typed-
5551 // variable path returns empty. paramtab IS the C-source
5552 // canonical scalar store; this mirror keeps it coherent
5553 // with the parallel `exec.variables` HashMap.
5554 crate::ported::params::setsparam(&name, &stored); // c:params.c:3350
5555 // `set -o allexport`: every assignment auto-exports the var.
5556 // zsh: `setopt allexport; a=42; env | grep ^a=` prints `a=42`.
5557 // Without this, env didn't see user-set scalars.
5558 let allexport = crate::ported::options::opt_state_get("allexport").unwrap_or(false);
5559 let already_exported = (exec.param_flags(&name) as u32 & crate::ported::zsh_h::PM_EXPORTED) != 0;
5560 if allexport || already_exported {
5561 std::env::set_var(&name, &stored);
5562 }
5563 // PFA-SMR aspect: every top-level scalar assignment
5564 // (`VAR=value`) compiles to BUILTIN_SET_VAR, so this is the
5565 // chokepoint. Skip the recorder when inside a function scope
5566 // (those are runtime locals, not config state) and skip the
5567 // intrinsic specials zsh maintains itself.
5568 #[cfg(feature = "recorder")]
5569 if crate::recorder::is_enabled()
5570 && exec.local_scope_depth == 0
5571 && !matches!(
5572 name.as_str(),
5573 "PPID" | "LINENO" | "ZSH_ARGZERO" | "argv0" | "ARGC" | "?" | "_" | "RANDOM"
5574 )
5575 {
5576 let ctx = exec.recorder_ctx();
5577 let attrs = exec.recorder_attrs_for(&name);
5578 crate::recorder::emit_assign_typed(&name, &stored, attrs, ctx);
5579 }
5580 false
5581 });
5582 if blocked {
5583 return Value::Status(1);
5584 }
5585 // Propagate cmd-subst's exit status to $?. zsh: `a=$(false);
5586 // echo $?` → 1. run_command_substitution sets last_status
5587 // before returning; we pick it up here so the assignment's
5588 // status reflects the cmd-subst result.
5589 //
5590 // CRITICAL: read `vm.last_status` (live), NOT
5591 // `exec.last_status` (stale — only synced at statement
5592 // boundaries; see the BUILTIN_RETURN handler ~line 1003).
5593 // compile_assign emits LoadInt(0) + SetStatus BEFORE the
5594 // RHS is evaluated specifically to clear the live status,
5595 // so a plain assignment (no cmd-subst) reads back 0 and a
5596 // `$(...)` value reads back the subst's exit. Reading the
5597 // stale exec field here would always propagate the previous
5598 // command's status, breaking `false; a=plain; echo $?` → 1
5599 // (should be 0).
5600 let captured = vm.last_status;
5601 Value::Status(captured)
5602 });
5603
5604 // BUILTIN_REGISTER_FUNCTION (id 282) was a legacy JSON-AST body
5605 // bridge. ZshCompiler emits BUILTIN_REGISTER_COMPILED_FN (id 305)
5606 // instead, which carries a base64 bincode of an already-compiled
5607 // Chunk. The constant + handler are removed; the ID stays reserved.
5608
5609 // Pre-compiled function registration — used by compile_zsh.rs's
5610 // FuncDef path. Stack: [name, base64-bincode-of-Chunk]. We decode
5611 // the base64, deserialize the Chunk, and store directly in
5612 // executor.functions_compiled. Bypasses the ShellCommand JSON layer.
5613 // `[[ -v name ]]` — true iff `name` is a set variable (incl. set-empty,
5614 // arrays, assoc arrays, and exported env vars). Pops one string, pushes
5615 // Bool. Matches bash's -v semantics; zsh's `(t)` flag overlaps.
5616 vm.register_builtin(BUILTIN_VAR_EXISTS, |vm, _argc| {
5617 let name = vm.pop().to_str();
5618 // `[[ -v a[N] ]]` checks element existence, not just the array.
5619 // Split on `[`, look up the array, and verify the resolved
5620 // index falls within the populated range. `[[ -v h[key] ]]`
5621 // checks an associative array key.
5622 if let Some(open) = name.find('[') {
5623 if name.ends_with(']') {
5624 let arr_name = &name[..open];
5625 let key = &name[open + 1..name.len() - 1];
5626 let exists = with_executor(|exec| {
5627 if let Some(arr) = exec.array(arr_name) {
5628 // 1-based index, supports negatives.
5629 let parsed = key.parse::<i64>().ok();
5630 if let Some(i) = parsed {
5631 let len = arr.len() as i64;
5632 let resolved = if i < 0 { len + i + 1 } else { i };
5633 return resolved >= 1 && resolved <= len;
5634 }
5635 return false;
5636 }
5637 if let Some(h) = exec.assoc(arr_name) {
5638 return h.contains_key(key);
5639 }
5640 false
5641 });
5642 return fusevm::Value::Bool(exists);
5643 }
5644 }
5645 let exists = with_executor(|exec| {
5646 // Positional parameter test: `[[ -v N ]]` for an integer N
5647 // checks whether `$N` is set — i.e. there are at least N
5648 // positional params. The digit name otherwise won't exist
5649 // in `variables` unless explicitly assigned.
5650 if !name.is_empty() && name.chars().all(|c| c.is_ascii_digit()) {
5651 if let Ok(n) = name.parse::<usize>() {
5652 if n == 0 {
5653 return exec.has_scalar("0");
5654 }
5655 return n <= exec.pparams().len();
5656 }
5657 }
5658 exec.has_scalar(&name)
5659 || exec.array(&name).is_some()
5660 || exec.assoc(&name).is_some()
5661 || std::env::var(&name).is_ok()
5662 });
5663 fusevm::Value::Bool(exists)
5664 });
5665
5666 // `time { compound; ... }` — runs the sub-chunk and prints elapsed
5667 // wall-clock time. zsh's full `time` also tracks user/system CPU via
5668 // getrusage on the *child*; we approximate via wall-time only since
5669 // the sub-chunk runs in-process (no fork). Output format matches
5670 // `time simple-cmd` (already implemented elsewhere via exectime).
5671 vm.register_builtin(BUILTIN_TIME_SUBLIST, |vm, _argc| {
5672 let sub_idx = vm.pop().to_int() as usize;
5673 let chunk_opt = vm.chunk.sub_chunks.get(sub_idx).cloned();
5674 let Some(chunk) = chunk_opt else {
5675 return Value::Status(0);
5676 };
5677 let start = Instant::now();
5678 let mut sub_vm = fusevm::VM::new(chunk);
5679 register_builtins(&mut sub_vm);
5680 let _ = sub_vm.run();
5681 let status = sub_vm.last_status;
5682 let elapsed = start.elapsed();
5683 eprintln!(
5684 "{:.2}s user {:.2}s system {:.0}% cpu {:.3} total",
5685 elapsed.as_secs_f64() * 0.7,
5686 elapsed.as_secs_f64() * 0.1,
5687 ((elapsed.as_secs_f64() * 0.8) / elapsed.as_secs_f64() * 100.0).min(100.0),
5688 elapsed.as_secs_f64()
5689 );
5690 Value::Status(status)
5691 });
5692
5693 // `{name}>file` / `{name}<file` / `{name}>>file` — named-fd allocator.
5694 // Stack: [path, varid, op_byte]. Opens path with the appropriate mode
5695 // and stores the resulting fd number in $varid as a string. We use
5696 // a high starting fd (10+) by allocating then dup'ing — matches zsh's
5697 // "fresh fd >= 10" promise so subsequent commands don't collide on
5698 // stdin/out/err.
5699 vm.register_builtin(BUILTIN_OPEN_NAMED_FD, |vm, _argc| {
5700 let op_byte = vm.pop().to_int() as u8;
5701 let varid = vm.pop().to_str();
5702 let path = vm.pop().to_str();
5703 let path_c = match std::ffi::CString::new(path.clone()) {
5704 Ok(c) => c,
5705 Err(_) => return Value::Status(1),
5706 };
5707 let flags = match op_byte {
5708 b if b == fusevm::op::redirect_op::READ => libc::O_RDONLY,
5709 b if b == fusevm::op::redirect_op::WRITE || b == fusevm::op::redirect_op::CLOBBER => {
5710 libc::O_WRONLY | libc::O_CREAT | libc::O_TRUNC
5711 }
5712 b if b == fusevm::op::redirect_op::APPEND => {
5713 libc::O_WRONLY | libc::O_CREAT | libc::O_APPEND
5714 }
5715 b if b == fusevm::op::redirect_op::READ_WRITE => libc::O_RDWR | libc::O_CREAT,
5716 _ => return Value::Status(1),
5717 };
5718 let fd = unsafe { libc::open(path_c.as_ptr(), flags, 0o644) };
5719 if fd < 0 {
5720 return Value::Status(1);
5721 }
5722 // Re-dup to fd >= 10 so positional fds (0/1/2/etc.) stay free.
5723 let new_fd = unsafe { libc::fcntl(fd, libc::F_DUPFD_CLOEXEC, 10) };
5724 let final_fd = if new_fd >= 10 {
5725 unsafe { libc::close(fd) };
5726 new_fd
5727 } else {
5728 fd
5729 };
5730 with_executor(|exec| {
5731 exec.set_scalar(varid, final_fd.to_string());
5732 });
5733 Value::Status(0)
5734 });
5735
5736 // BUILTIN_SET_TRY_BLOCK_ERROR — capture the try-block's exit status
5737 // into $TRY_BLOCK_ERROR so the always-arm can read it.
5738 vm.register_builtin(BUILTIN_SET_TRY_BLOCK_ERROR, |vm, _argc| {
5739 let vm_status = vm.last_status;
5740 with_executor(|exec| {
5741 exec.set_scalar("TRY_BLOCK_ERROR".to_string(), vm_status.to_string());
5742 });
5743 fusevm::Value::Status(0)
5744 });
5745
5746 // BUILTIN_BEGIN_INLINE_ENV / END_INLINE_ENV — wrap an
5747 // inline-assignment-prefixed command (`X=foo Y=bar cmd`):
5748 // BEGIN pushes a save frame; SET_VAR fires for each assign and
5749 // ALSO env::set_var's the value (visible to cmd's child); the
5750 // command runs; END pops the frame and restores both shell-var
5751 // and process-env state. Direct port of zsh's addvars() →
5752 // execute_simple → restore-after-exec contract.
5753 vm.register_builtin(BUILTIN_BEGIN_INLINE_ENV, |_vm, _argc| {
5754 with_executor(|exec| {
5755 exec.inline_env_stack.push(Vec::new());
5756 });
5757 fusevm::Value::Status(0)
5758 });
5759 vm.register_builtin(BUILTIN_END_INLINE_ENV, |_vm, _argc| {
5760 with_executor(|exec| {
5761 if let Some(frame) = exec.inline_env_stack.pop() {
5762 for (name, prev_var, prev_env) in frame.into_iter().rev() {
5763 match prev_var {
5764 Some(v) => {
5765 exec.set_scalar(name.clone(), v);
5766 }
5767 None => {
5768 exec.unset_scalar(&name);
5769 }
5770 }
5771 match prev_env {
5772 Some(v) => std::env::set_var(&name, &v),
5773 None => std::env::remove_var(&name),
5774 }
5775 }
5776 }
5777 });
5778 fusevm::Value::Status(0)
5779 });
5780
5781 // BUILTIN_RESTORE_TRY_BLOCK_STATUS — emitted at the end of an
5782 // `always` arm. Per zshmisc, the exit status of the entire
5783 // `{ try } always { finally }` construct is the try-list's
5784 // status, regardless of what happens in the always-list (the
5785 // exception is `return`/`exit` inside always, which short-
5786 // circuits and the cleanup is the only thing that runs). So
5787 // restore TRY_BLOCK_ERROR unconditionally — the always-list's
5788 // exit status is discarded for the construct.
5789 vm.register_builtin(BUILTIN_RESTORE_TRY_BLOCK_STATUS, |_vm, _argc| {
5790 let try_status = with_executor(|exec| {
5791 exec.scalar("TRY_BLOCK_ERROR")
5792 .and_then(|s| s.parse::<i32>().ok())
5793 .unwrap_or(0)
5794 });
5795 fusevm::Value::Status(try_status)
5796 });
5797
5798 vm.register_builtin(BUILTIN_UNKNOWN_COND, |vm, _argc| {
5799 // Unused — the diagnostic is emitted at compile time
5800 // (BUILTIN dispatch wasn't reliably firing for this path).
5801 // Kept registered as a no-op placeholder.
5802 let _ = vm.pop();
5803 fusevm::Value::Bool(false)
5804 });
5805
5806 vm.register_builtin(BUILTIN_IS_TTY, |vm, _argc| {
5807 let fd_str = vm.pop().to_str();
5808 let fd: i32 = fd_str.trim().parse().unwrap_or(-1);
5809 let is_tty = if fd < 0 {
5810 false
5811 } else {
5812 unsafe { libc::isatty(fd) != 0 }
5813 };
5814 fusevm::Value::Bool(is_tty)
5815 });
5816
5817 // Set $LINENO before executing the next statement. Direct
5818 // port of zsh's `lineno` global tracking from Src/input.c
5819 // (`if ((inbufflags & INP_LINENO) || !strin) && c == '\n')
5820 // lineno++;`). The compiler emits one of these before each
5821 // top-level pipe in `compile_sublist`, carrying the line
5822 // number captured by the parser at `ZshPipe.lineno`. Pops
5823 // [n], updates `$LINENO` in the variable table.
5824 vm.register_builtin(BUILTIN_SET_LINENO, |vm, _argc| {
5825 let n = vm.pop().to_int();
5826 with_executor(|exec| {
5827 exec.set_scalar("LINENO".to_string(), n.to_string());
5828 });
5829 // Mirror to the file-static `lineno` (utils.c:121) that
5830 // zerrmsg reads at utils.c:301 for the `:N: msg` prefix.
5831 crate::ported::utils::set_lineno(n as i32);
5832 fusevm::Value::Status(0)
5833 });
5834
5835 // Direct port of Src/prompt.c:1623 cmdpush. Token is a `CS_*`
5836 // value (zsh.h:2775-2806) emitted by compile_zsh around each
5837 // compound command (if/while/[[…]]/((…))/$(…)) and consumed by
5838 // `%_` in PS4 / prompt expansion.
5839 vm.register_builtin(BUILTIN_CMD_PUSH, |vm, _argc| {
5840 let token = vm.pop().to_int() as u8;
5841 // Route through canonical cmdpush (Src/prompt.c:1623). The
5842 // prompt expander reads from the file-static `CMDSTACK` at
5843 // `prompt.rs:2006`, not `exec.cmd_stack` — without this,
5844 // `%_` in PS4 saw an empty stack during xtrace.
5845 if (token as i32) < crate::ported::zsh_h::CS_COUNT {
5846 crate::ported::prompt::cmdpush(token);
5847 }
5848 // Canonical `cmdpush()` above already mirrors into the
5849 // `prompt::CMDSTACK` thread_local (Src/prompt.c:1620). The
5850 // legacy `exec.cmd_stack` mirror is gone.
5851 let _ = token;
5852 fusevm::Value::Status(0)
5853 });
5854
5855 // Direct port of Src/prompt.c:1631 cmdpop.
5856 vm.register_builtin(BUILTIN_CMD_POP, |_vm, _argc| {
5857 crate::ported::prompt::cmdpop();
5858 fusevm::Value::Status(0)
5859 });
5860
5861 vm.register_builtin(BUILTIN_OPTION_SET, |vm, _argc| {
5862 let name = vm.pop().to_str();
5863 // Direct port of `optison(char *name, char *s)` at Src/cond.c:502 — `[[ -o NAME ]]`
5864 // reads through the same `opts[]` array that `setopt NAME`
5865 // writes via `dosetopt`. Earlier code read a duplicate Executor
5866 // HashMap which never saw `bin_setopt`'s writes (those land in
5867 // `OPTS_LIVE` via `opt_state_set`). Routing through the canonical
5868 // C port restores the single-store invariant: one `opts[]`,
5869 // shared between setopt/unsetopt and `[[ -o ]]`.
5870 let r = crate::ported::cond::optison("test", &name); // c:cond.c:502
5871 match r {
5872 0 => fusevm::Value::Bool(true), // c:cond.c:520 set
5873 1 => fusevm::Value::Bool(false), // c:cond.c:518/520 unset
5874 _ => {
5875 // c:cond.c:514 — unknown option: zwarnnam emitted by
5876 // optison itself when POSIXBUILTINS is unset; mirror to
5877 // stderr here for parity with the earlier diagnostic.
5878 eprintln!("zshrs:1: no such option: {}", name);
5879 fusevm::Value::Bool(false)
5880 }
5881 }
5882 });
5883
5884 vm.register_builtin(BUILTIN_PARAM_FILTER, |vm, _argc| {
5885 let pattern_raw = vm.pop().to_str();
5886 let name = vm.pop().to_str();
5887 // Expand `$VAR` / `${VAR}` / `$(cmd)` / `$((expr))` references in
5888 // the pattern before matching. Direct port of Src/subst.c:3192
5889 // case '#' arm which calls singsub on the operand. zinit's
5890 // `${(@)region_highlight:#$_LAST_HIGHLIGHT}` and similar idioms
5891 // rely on the pattern being expanded first.
5892 let pattern = if pattern_raw.contains('$') || pattern_raw.contains('`') {
5893 crate::ported::subst::singsub(&pattern_raw)
5894 } else {
5895 pattern_raw
5896 };
5897 let arr_val = with_executor(|exec| exec.array(&name));
5898 // Inline of the deleted extendedglob_match helper (Src/glob.c
5899 // pattern_match path): leading `^` inverts when extendedglob is
5900 // set; otherwise falls through to glob_match_static. Plain
5901 // literal-equal path retained for the no-meta-char case
5902 // (cheaper than running a regex compile on every element).
5903 let matches_glob = |s: &str, pat: &str| -> bool {
5904 let starts_neg = pat.starts_with('^');
5905 if pat.contains('*') || pat.contains('?') || pat.contains('[') || starts_neg {
5906 let extendedglob = with_executor(|exec| {
5907 crate::ported::options::opt_state_get("extendedglob").unwrap_or(false)
5908 });
5909 if extendedglob {
5910 if let Some(neg) = pat.strip_prefix('^') {
5911 return !crate::exec::glob_match_static(s, neg);
5912 }
5913 }
5914 crate::exec::glob_match_static(s, pat)
5915 } else {
5916 s == pat
5917 }
5918 };
5919 // (M) flag inverts the filter: keep matching elements, drop
5920 // non-matching (vs default which drops matches). Direct port
5921 // of subst.c's SUB_MATCH bit which getmatch consults to
5922 // pick the "matched" disposition over the "rest" default.
5923 let invert = {
5924 let sf = crate::ported::subst::sub_flags_get(); // c:2171
5925 let inv = (sf & 0x0008) != 0; // c:2171 SUB_MATCH
5926 crate::ported::subst::sub_flags_set(0); // c:2169 (consume)
5927 inv
5928 };
5929 if let Some(arr) = arr_val {
5930 let kept: Vec<fusevm::Value> = arr
5931 .into_iter()
5932 .filter(|elem| { // c:2171
5933 let m = matches_glob(elem, &pattern); // c:2171
5934 if invert { m } else { !m } // c:2171
5935 })
5936 .map(fusevm::Value::str)
5937 .collect();
5938 return fusevm::Value::Array(kept);
5939 }
5940 let val = with_executor(|exec| exec.get_variable(&name));
5941 let m = matches_glob(&val, &pattern);
5942 if invert { // c:2171
5943 if m { fusevm::Value::str(val) } else { fusevm::Value::str(String::new()) } // c:2171
5944 } else if m {
5945 fusevm::Value::str(String::new())
5946 } else {
5947 fusevm::Value::str(val)
5948 }
5949 });
5950
5951 // `a[i]=(elements)` / `a[i,j]=(elements)` / `a[i]=()`
5952 // — subscripted-array assign with array RHS. Stack pushed by
5953 // compile_assign as: [elem0, elem1, …, elemN-1, name, key].
5954 vm.register_builtin(BUILTIN_SET_SUBSCRIPT_RANGE, |vm, argc| {
5955 let n = argc as usize;
5956 let mut popped: Vec<fusevm::Value> = Vec::with_capacity(n);
5957 for _ in 0..n {
5958 popped.push(vm.pop());
5959 }
5960 popped.reverse();
5961 if popped.len() < 2 {
5962 return fusevm::Value::Status(1);
5963 }
5964 let key = popped.pop().unwrap().to_str();
5965 let name = popped.pop().unwrap().to_str();
5966 let mut values: Vec<String> = Vec::new();
5967 for v in popped {
5968 match v {
5969 fusevm::Value::Array(items) => {
5970 for it in items {
5971 values.push(it.to_str());
5972 }
5973 }
5974 other => values.push(other.to_str()),
5975 }
5976 }
5977 with_executor(|exec| {
5978 // Read paramtab-first, mutate, write back via canonical
5979 // set_array so subscript-slice/index assignments are
5980 // visible to both the paramtab single source and the
5981 // legacy cache.
5982 let mut arr = exec.array(&name).unwrap_or_default();
5983 // Slice form `a[i,j]=(values)` — replace the inclusive
5984 // slice. Negative bounds count from end. Out-of-range high
5985 // bound clamps to len; low bound below 1 clamps to 1.
5986 if let Some((s_str, e_str)) = key.split_once(',') {
5987 let len = arr.len() as i64;
5988 let resolve = |s: &str| -> i64 { s.trim().parse::<i64>().unwrap_or_default() };
5989 let s_raw = resolve(s_str);
5990 let e_raw = resolve(e_str);
5991 let lo = if s_raw < 0 {
5992 (len + s_raw + 1).max(1)
5993 } else {
5994 s_raw.max(1)
5995 };
5996 let hi = if e_raw < 0 {
5997 (len + e_raw + 1).max(0)
5998 } else {
5999 e_raw.max(0)
6000 };
6001 let lo_idx = (lo - 1) as usize;
6002 let hi_idx = ((hi as usize).min(arr.len())).max(lo_idx);
6003 let _: Vec<String> = arr.splice(lo_idx..hi_idx, values).collect();
6004 exec.set_array(name, arr);
6005 return;
6006 }
6007 // Single-int key. `a[i]=()` (empty values) removes the
6008 // element at that index. Otherwise treat as a multi-element
6009 // splice starting at i.
6010 let i: i64 = match key.trim().parse::<i64>() {
6011 Ok(n) => n,
6012 Err(_) => return,
6013 };
6014 let len = arr.len() as i64;
6015 let idx = if i > 0 {
6016 (i - 1) as usize
6017 } else if i < 0 {
6018 let off = len + i;
6019 if off < 0 {
6020 return;
6021 }
6022 off as usize
6023 } else {
6024 return;
6025 };
6026 if values.is_empty() {
6027 if idx < arr.len() {
6028 arr.remove(idx);
6029 }
6030 } else {
6031 let end = (idx + 1).min(arr.len());
6032 let _: Vec<String> = arr.splice(idx..end, values).collect();
6033 }
6034 exec.set_array(name, arr);
6035 });
6036 fusevm::Value::Status(0)
6037 });
6038
6039 // BUILTIN_CONCAT_SPLICE — word-segment concat with first/last
6040 // sticking (default zsh splice semantics for `${arr[@]}`, `$@`).
6041 vm.register_builtin(BUILTIN_CONCAT_SPLICE, |vm, _argc| {
6042 let rhs = vm.pop();
6043 let lhs = vm.pop();
6044 match (lhs, rhs) {
6045 (fusevm::Value::Array(mut la), fusevm::Value::Array(ra)) => {
6046 if la.is_empty() {
6047 return fusevm::Value::Array(ra);
6048 }
6049 if ra.is_empty() {
6050 return fusevm::Value::Array(la);
6051 }
6052 // Last of la merges with first of ra; rest unchanged.
6053 let last_l = la.pop().unwrap();
6054 let mut ra_iter = ra.into_iter();
6055 let first_r = ra_iter.next().unwrap();
6056 let l_s = last_l.as_str_cow();
6057 let r_s = first_r.as_str_cow();
6058 let mut merged = String::with_capacity(l_s.len() + r_s.len());
6059 merged.push_str(&l_s);
6060 merged.push_str(&r_s);
6061 la.push(fusevm::Value::str(merged));
6062 la.extend(ra_iter);
6063 fusevm::Value::Array(la)
6064 }
6065 (fusevm::Value::Array(mut la), rhs_scalar) => {
6066 if la.is_empty() {
6067 return fusevm::Value::str(rhs_scalar.as_str_cow().to_string());
6068 }
6069 let last = la.pop().unwrap();
6070 let l_s = last.as_str_cow();
6071 let r_s = rhs_scalar.as_str_cow();
6072 let mut s = String::with_capacity(l_s.len() + r_s.len());
6073 s.push_str(&l_s);
6074 s.push_str(&r_s);
6075 la.push(fusevm::Value::str(s));
6076 fusevm::Value::Array(la)
6077 }
6078 (lhs_scalar, fusevm::Value::Array(mut ra)) => {
6079 if ra.is_empty() {
6080 return fusevm::Value::str(lhs_scalar.as_str_cow().to_string());
6081 }
6082 let first = ra.remove(0);
6083 let l_s = lhs_scalar.as_str_cow();
6084 let r_s = first.as_str_cow();
6085 let mut s = String::with_capacity(l_s.len() + r_s.len());
6086 s.push_str(&l_s);
6087 s.push_str(&r_s);
6088 let mut out = Vec::with_capacity(ra.len() + 1);
6089 out.push(fusevm::Value::str(s));
6090 out.extend(ra);
6091 fusevm::Value::Array(out)
6092 }
6093 (lhs_s, rhs_s) => {
6094 let l = lhs_s.as_str_cow();
6095 let r = rhs_s.as_str_cow();
6096 let mut s = String::with_capacity(l.len() + r.len());
6097 s.push_str(&l);
6098 s.push_str(&r);
6099 fusevm::Value::str(s)
6100 }
6101 }
6102 });
6103
6104 // BUILTIN_CONCAT_DISTRIBUTE — word-segment concat. With
6105 // rcexpandparam (zsh option), distributes element-wise (cartesian
6106 // product). Default mode: joins arrays with IFS first char to a
6107 // single scalar before concat, matching zsh's default unquoted
6108 // and DQ semantics. Direct port of Src/subst.c sepjoin path
6109 // (line ~1813) which gates element-vs-join on the rc_expand_param
6110 // option, defaulting to join.
6111 // BUILTIN_CONCAT_DISTRIBUTE_FORCED — same shape as
6112 // CONCAT_DISTRIBUTE, but always cartesian-distributes when one
6113 // side is Array. Used for compile-time-detected explicit
6114 // distribution forms (`${^arr}` etc.) where the source flag
6115 // overrides the rcexpandparam option default.
6116 vm.register_builtin(BUILTIN_CONCAT_DISTRIBUTE_FORCED, |vm, _argc| {
6117 let rhs = vm.pop();
6118 let lhs = vm.pop();
6119 match (lhs, rhs) {
6120 (fusevm::Value::Array(la), fusevm::Value::Array(ra)) => {
6121 if ra.is_empty() {
6122 return fusevm::Value::Array(la);
6123 }
6124 if la.is_empty() {
6125 return fusevm::Value::Array(ra);
6126 }
6127 let mut out = Vec::with_capacity(la.len() * ra.len());
6128 for a in &la {
6129 let a_s = a.as_str_cow();
6130 for b in &ra {
6131 let b_s = b.as_str_cow();
6132 let mut s = String::with_capacity(a_s.len() + b_s.len());
6133 s.push_str(&a_s);
6134 s.push_str(&b_s);
6135 out.push(fusevm::Value::str(s));
6136 }
6137 }
6138 fusevm::Value::Array(out)
6139 }
6140 (fusevm::Value::Array(la), rhs_scalar) => {
6141 let r = rhs_scalar.as_str_cow();
6142 let out: Vec<fusevm::Value> = la
6143 .into_iter()
6144 .map(|a| {
6145 let a_s = a.as_str_cow();
6146 let mut s = String::with_capacity(a_s.len() + r.len());
6147 s.push_str(&a_s);
6148 s.push_str(&r);
6149 fusevm::Value::str(s)
6150 })
6151 .collect();
6152 fusevm::Value::Array(out)
6153 }
6154 (lhs_scalar, fusevm::Value::Array(ra)) => {
6155 let l = lhs_scalar.as_str_cow();
6156 let out: Vec<fusevm::Value> = ra
6157 .into_iter()
6158 .map(|b| {
6159 let b_s = b.as_str_cow();
6160 let mut s = String::with_capacity(l.len() + b_s.len());
6161 s.push_str(&l);
6162 s.push_str(&b_s);
6163 fusevm::Value::str(s)
6164 })
6165 .collect();
6166 fusevm::Value::Array(out)
6167 }
6168 (lhs_s, rhs_s) => {
6169 let l = lhs_s.as_str_cow();
6170 let r = rhs_s.as_str_cow();
6171 let mut s = String::with_capacity(l.len() + r.len());
6172 s.push_str(&l);
6173 s.push_str(&r);
6174 fusevm::Value::str(s)
6175 }
6176 }
6177 });
6178
6179 vm.register_builtin(BUILTIN_CONCAT_DISTRIBUTE, |vm, _argc| {
6180 let rhs = vm.pop();
6181 let lhs = vm.pop();
6182 let rc_expand = with_executor(|exec| {
6183 crate::ported::options::opt_state_get("rcexpandparam").unwrap_or(false)
6184 });
6185 let ifs_first = || -> String {
6186 with_executor(|exec| {
6187 exec.get_variable("IFS")
6188 .chars()
6189 .next()
6190 .map(|c| c.to_string())
6191 .unwrap_or_else(|| " ".to_string())
6192 })
6193 };
6194 // Helper: join an Array to scalar via IFS-first.
6195 let join_arr = |arr: Vec<fusevm::Value>| -> String {
6196 let sep = ifs_first();
6197 arr.iter()
6198 .map(|v| v.as_str_cow().into_owned())
6199 .collect::<Vec<_>>()
6200 .join(&sep)
6201 };
6202 if !rc_expand {
6203 // Default: join any Array side to scalar, then concat.
6204 let l = match lhs {
6205 fusevm::Value::Array(a) => join_arr(a),
6206 other => other.as_str_cow().into_owned(),
6207 };
6208 let r = match rhs {
6209 fusevm::Value::Array(a) => join_arr(a),
6210 other => other.as_str_cow().into_owned(),
6211 };
6212 let mut s = String::with_capacity(l.len() + r.len());
6213 s.push_str(&l);
6214 s.push_str(&r);
6215 return fusevm::Value::str(s);
6216 }
6217 match (lhs, rhs) {
6218 (fusevm::Value::Array(la), fusevm::Value::Array(ra)) => {
6219 // Cartesian product: [a + b for a in la for b in ra].
6220 let mut out = Vec::with_capacity(la.len() * ra.len().max(1));
6221 if ra.is_empty() {
6222 return fusevm::Value::Array(la);
6223 }
6224 if la.is_empty() {
6225 return fusevm::Value::Array(ra);
6226 }
6227 for a in &la {
6228 let a_s = a.as_str_cow();
6229 for b in &ra {
6230 let b_s = b.as_str_cow();
6231 let mut s = String::with_capacity(a_s.len() + b_s.len());
6232 s.push_str(&a_s);
6233 s.push_str(&b_s);
6234 out.push(fusevm::Value::str(s));
6235 }
6236 }
6237 fusevm::Value::Array(out)
6238 }
6239 (fusevm::Value::Array(la), rhs_scalar) => {
6240 let r = rhs_scalar.as_str_cow();
6241 let out: Vec<fusevm::Value> = la
6242 .into_iter()
6243 .map(|a| {
6244 let a_s = a.as_str_cow();
6245 let mut s = String::with_capacity(a_s.len() + r.len());
6246 s.push_str(&a_s);
6247 s.push_str(&r);
6248 fusevm::Value::str(s)
6249 })
6250 .collect();
6251 fusevm::Value::Array(out)
6252 }
6253 (lhs_scalar, fusevm::Value::Array(ra)) => {
6254 let l = lhs_scalar.as_str_cow();
6255 let out: Vec<fusevm::Value> = ra
6256 .into_iter()
6257 .map(|b| {
6258 let b_s = b.as_str_cow();
6259 let mut s = String::with_capacity(l.len() + b_s.len());
6260 s.push_str(&l);
6261 s.push_str(&b_s);
6262 fusevm::Value::str(s)
6263 })
6264 .collect();
6265 fusevm::Value::Array(out)
6266 }
6267 (lhs_s, rhs_s) => {
6268 // Fast path: both scalar → identical to Op::Concat.
6269 let l = lhs_s.as_str_cow();
6270 let r = rhs_s.as_str_cow();
6271 let mut s = String::with_capacity(l.len() + r.len());
6272 s.push_str(&l);
6273 s.push_str(&r);
6274 fusevm::Value::str(s)
6275 }
6276 }
6277 });
6278
6279 // `[[ a -ef b ]]` — same-inode test. Resolves both paths via fs::metadata
6280 // (follows symlinks the way zsh's -ef does) and compares (dev, inode).
6281 // Returns false on any I/O error (path missing, permission denied, etc.).
6282 vm.register_builtin(BUILTIN_SAME_FILE, |vm, _argc| {
6283 let b = vm.pop().to_str();
6284 let a = vm.pop().to_str();
6285 let same = match (std::fs::metadata(&a), std::fs::metadata(&b)) {
6286 (Ok(ma), Ok(mb)) => ma.dev() == mb.dev() && ma.ino() == mb.ino(),
6287 _ => false,
6288 };
6289 fusevm::Value::Bool(same)
6290 });
6291
6292 // `[[ -c path ]]` — character device.
6293 vm.register_builtin(BUILTIN_IS_CHARDEV, |vm, _argc| {
6294 let path = vm.pop().to_str();
6295 let result = std::fs::metadata(&path)
6296 .map(|m| m.file_type().is_char_device())
6297 .unwrap_or(false);
6298 fusevm::Value::Bool(result)
6299 });
6300 // `[[ -b path ]]` — block device.
6301 vm.register_builtin(BUILTIN_IS_BLOCKDEV, |vm, _argc| {
6302 let path = vm.pop().to_str();
6303 let result = std::fs::metadata(&path)
6304 .map(|m| m.file_type().is_block_device())
6305 .unwrap_or(false);
6306 fusevm::Value::Bool(result)
6307 });
6308 // `[[ -p path ]]` — FIFO (named pipe).
6309 vm.register_builtin(BUILTIN_IS_FIFO, |vm, _argc| {
6310 let path = vm.pop().to_str();
6311 let result = std::fs::metadata(&path)
6312 .map(|m| m.file_type().is_fifo())
6313 .unwrap_or(false);
6314 fusevm::Value::Bool(result)
6315 });
6316 // `[[ -S path ]]` — socket.
6317 vm.register_builtin(BUILTIN_IS_SOCKET, |vm, _argc| {
6318 let path = vm.pop().to_str();
6319 let result = std::fs::symlink_metadata(&path)
6320 .map(|m| m.file_type().is_socket())
6321 .unwrap_or(false);
6322 fusevm::Value::Bool(result)
6323 });
6324
6325 // `[[ -k path ]]` / `-u` / `-g` — sticky / setuid / setgid bit.
6326 vm.register_builtin(BUILTIN_HAS_STICKY, |vm, _argc| {
6327 let path = vm.pop().to_str();
6328 let result = std::fs::metadata(&path)
6329 .map(|m| m.permissions().mode() & libc::S_ISVTX as u32 != 0)
6330 .unwrap_or(false);
6331 fusevm::Value::Bool(result)
6332 });
6333 vm.register_builtin(BUILTIN_HAS_SETUID, |vm, _argc| {
6334 let path = vm.pop().to_str();
6335 let result = std::fs::metadata(&path)
6336 .map(|m| m.permissions().mode() & libc::S_ISUID as u32 != 0)
6337 .unwrap_or(false);
6338 fusevm::Value::Bool(result)
6339 });
6340 vm.register_builtin(BUILTIN_HAS_SETGID, |vm, _argc| {
6341 let path = vm.pop().to_str();
6342 let result = std::fs::metadata(&path)
6343 .map(|m| m.permissions().mode() & libc::S_ISGID as u32 != 0)
6344 .unwrap_or(false);
6345 fusevm::Value::Bool(result)
6346 });
6347 vm.register_builtin(BUILTIN_OWNED_BY_USER, |vm, _argc| {
6348 let path = vm.pop().to_str();
6349 let euid = unsafe { libc::geteuid() };
6350 let result = std::fs::metadata(&path)
6351 .map(|m| m.uid() == euid)
6352 .unwrap_or(false);
6353 fusevm::Value::Bool(result)
6354 });
6355 vm.register_builtin(BUILTIN_OWNED_BY_GROUP, |vm, _argc| {
6356 let path = vm.pop().to_str();
6357 let egid = unsafe { libc::getegid() };
6358 let result = std::fs::metadata(&path)
6359 .map(|m| m.gid() == egid)
6360 .unwrap_or(false);
6361 fusevm::Value::Bool(result)
6362 });
6363
6364 // `[[ -N path ]]` — file's access time is NOT newer than its
6365 // modification time (zsh man: "true if file exists and its
6366 // access time is not newer than its modification time"). Used
6367 // by zsh's mailbox-watching code. The semantic is `atime <=
6368 // mtime` (equivalent to `mtime >= atime`) — equal counts as
6369 // true, which a strict `mtime > atime` check missed for newly
6370 // created files where both stamps are identical.
6371 vm.register_builtin(BUILTIN_FILE_MODIFIED_SINCE_ACCESS, |vm, _argc| {
6372 let path = vm.pop().to_str();
6373 let result = std::fs::metadata(&path)
6374 .map(|m| m.atime() <= m.mtime())
6375 .unwrap_or(false);
6376 fusevm::Value::Bool(result)
6377 });
6378
6379 // `[[ a -nt b ]]` — true if `a`'s mtime is strictly later than `b`'s.
6380 // BOTH files must exist; if either is missing the result is false.
6381 // (Earlier behavior was bash's "missing == infinitely-old"; zsh
6382 // strictly requires both files to exist.)
6383 vm.register_builtin(BUILTIN_FILE_NEWER, |vm, _argc| {
6384 let b = vm.pop().to_str();
6385 let a = vm.pop().to_str();
6386 // Use SystemTime modified() for nanosecond precision —
6387 // MetadataExt::mtime() returns seconds only, so two files
6388 // touched within the same second compared equal even when
6389 // 500ms apart. zsh tracks ns and uses `>=` for ties (touching
6390 // a then b in quick succession should still report b newer).
6391 let ta = std::fs::metadata(&a).and_then(|m| m.modified()).ok();
6392 let tb = std::fs::metadata(&b).and_then(|m| m.modified()).ok();
6393 let result = match (ta, tb) {
6394 (Some(ta), Some(tb)) => ta > tb,
6395 _ => false,
6396 };
6397 fusevm::Value::Bool(result)
6398 });
6399
6400 // `[[ a -ot b ]]` — mirror of -nt. Same both-must-exist contract.
6401 vm.register_builtin(BUILTIN_FILE_OLDER, |vm, _argc| {
6402 let b = vm.pop().to_str();
6403 let a = vm.pop().to_str();
6404 let ta = std::fs::metadata(&a).and_then(|m| m.modified()).ok();
6405 let tb = std::fs::metadata(&b).and_then(|m| m.modified()).ok();
6406 let result = match (ta, tb) {
6407 (Some(ta), Some(tb)) => ta < tb,
6408 _ => false,
6409 };
6410 fusevm::Value::Bool(result)
6411 });
6412
6413 // `set -e` / `setopt errexit` post-command check. Compiler emits
6414 // this after each top-level command's SetStatus (skipped inside
6415 // conditionals/pipelines/&&||/`!`). If errexit is on AND the last
6416 // command exited non-zero AND it's not a `return` from a function,
6417 // exit the shell with that status.
6418 // `set -x` / `setopt xtrace` — print each command before it runs.
6419 // The compiler emits this BEFORE the actual builtin/external call
6420 // with the command's literal text as a single string arg. We
6421 // print to stderr if xtrace is on. Honors `$PS4` (default `+ `).
6422 //
6423 // ── XTRACE flow control ────────────────────────────────────────
6424 // Mirror of C zsh's `doneps4` flag in execcmd_exec (Src/exec.c).
6425 // When an assignment trace fires (XTRACE_ASSIGN), it emits PS4
6426 // and sets this flag so the subsequent XTRACE_ARGS skips its own
6427 // PS4 emission — the assignment + command end up on the SAME
6428 // line: `<PS4>a=1 echo hello\n`. XTRACE_ARGS / XTRACE_NEWLINE
6429 // reset the flag after emitting the trailing `\n`.
6430 vm.register_builtin(BUILTIN_XTRACE_LINE, |vm, _argc| {
6431 let cmd_text = vm.pop().to_str();
6432 // Sync exec.last_status with the live vm.last_status BEFORE
6433 // the next command runs. Direct port of the zsh exec.c
6434 // contract — `$?` reads the exit status of the *most recent*
6435 // command. XTRACE_LINE is emitted by the compiler BEFORE
6436 // every simple command, so it's the natural sync point.
6437 let live = vm.last_status;
6438 with_executor(|exec| {
6439 exec.set_last_status(live);
6440 });
6441 // C zsh emits xtrace for `(( … ))` / `[[ … ]]` / `case` /
6442 // `if/while/until/for/repeat` head expressions via
6443 // `printprompt4(); fprintf(xtrerr, "%s\n", expr)` at
6444 // Src/exec.c:5240 (math), c:5286 (cond), c:4117 (for), etc.
6445 // The compiler emits BUILTIN_XTRACE_LINE only at those
6446 // construct boundaries (compile_arith / compile_cond /
6447 // compile_if / compile_while / compile_for / compile_case);
6448 // simple commands route to BUILTIN_XTRACE_ARGS instead. So
6449 // this handler always emits when xtrace is on — no prefix-
6450 // string heuristic.
6451 let on = with_executor(|exec|
6452 crate::ported::options::opt_state_get("xtrace").unwrap_or(false));
6453 if on {
6454 let already = XTRACE_DONE_PS4.with(|f| f.get());
6455 if !already {
6456 printprompt4();
6457 }
6458 eprintln!("{}", cmd_text);
6459 XTRACE_DONE_PS4.with(|f| f.set(false));
6460 }
6461 fusevm::Value::Status(0)
6462 });
6463
6464 // Like XTRACE_LINE but reads the top `argc - 1` values from the
6465 // VM stack WITHOUT consuming them (peek), then pops a prefix
6466 // string at the top. Joins prefix + peeked args with spaces using
6467 // zsh's quotedzputs-equivalent quoting. Direct port of
6468 // Src/exec.c:2055-2066 — emit AFTER expansion, with each arg
6469 // shell-quoted, so `for i in a b; echo for $i` traces as
6470 // `echo for a` / `echo for b`, not `echo for $i`.
6471 //
6472 // Stack contract on entry: [arg1, arg2, ..., argN, prefix].
6473 // Pops prefix; peeks argN..arg1 below. argc = N + 1.
6474 vm.register_builtin(BUILTIN_XTRACE_ARGS, |vm, argc| {
6475 let prefix = vm.pop().to_str();
6476 let live = vm.last_status;
6477 with_executor(|exec| {
6478 exec.set_last_status(live);
6479 });
6480 let on = with_executor(|exec| crate::ported::options::opt_state_get("xtrace").unwrap_or(false));
6481 if on {
6482 let n_args = argc.saturating_sub(1) as usize;
6483 let len = vm.stack.len();
6484 let arg_strs: Vec<String> = if n_args > 0 && len >= n_args {
6485 vm.stack[len - n_args..]
6486 .iter()
6487 .map(|v| quotedzputs(&v.to_str()))
6488 .collect()
6489 } else {
6490 Vec::new()
6491 };
6492 // Builtins dispatch through `execbuiltin` (Src/builtin.c:442)
6493 // which emits its own PS4 + name + args xtrace. To avoid
6494 // double-emission, skip our emission here when the first
6495 // arg is a known builtin with a registered HandlerFunc —
6496 // those go through execbuiltin and will trace themselves.
6497 //
6498 // Externals + builtins without HandlerFunc (still pending
6499 // canonical port) keep our emission as a stand-in until
6500 // they migrate over.
6501 // The `prefix` IS the command name (first whitespace-token
6502 // of the original cmd text). If a BUILTIN entry with a
6503 // HandlerFunc matches, execbuiltin will emit xtrace there.
6504 let goes_through_execbuiltin = crate::ported::builtin::BUILTINS
6505 .iter()
6506 .any(|b| b.node.nam == prefix && b.handlerfunc.is_some());
6507 if !goes_through_execbuiltin {
6508 let line = if arg_strs.is_empty() {
6509 prefix
6510 } else {
6511 format!("{} {}", prefix, arg_strs.join(" "))
6512 };
6513 // Mirrors Src/exec.c:2055 xtrace emission. C does:
6514 // if (!doneps4) printprompt4();
6515 // ... emit args + spaces ...
6516 // fputc('\n', xtrerr); fflush(xtrerr);
6517 // We honor doneps4 via XTRACE_DONE_PS4 — if a prior
6518 // XTRACE_ASSIGN this line already emitted PS4, skip
6519 // it. Then reset the flag after the trailing newline
6520 // so the next command starts fresh.
6521 let already_ps4 = XTRACE_DONE_PS4.with(|f| f.get());
6522 if !already_ps4 {
6523 printprompt4();
6524 }
6525 eprintln!("{}", line);
6526 }
6527 XTRACE_DONE_PS4.with(|f| f.set(false));
6528 }
6529 fusevm::Value::Status(0)
6530 });
6531
6532 // BUILTIN_XTRACE_ASSIGN — direct port of the per-assignment
6533 // trace block at Src/exec.c:2517-2582. C body excerpt:
6534 // xtr = isset(XTRACE);
6535 // if (xtr) { printprompt4(); doneps4 = 1; }
6536 // while (assign) {
6537 // if (xtr) fprintf(xtrerr, "%s+=" or "%s=", name);
6538 // ... eval value into `val` ...
6539 // if (xtr) { quotedzputs(val, xtrerr); fputc(' ', xtrerr); }
6540 // ...
6541 // }
6542 //
6543 // Stack on entry: [..., name, value]. PEEKS both (they're left
6544 // on stack for SET_VAR to pop). Emits `name=<quoted-val> ` with
6545 // no newline; trailing `\n` comes from XTRACE_ARGS (cmd path)
6546 // or XTRACE_NEWLINE (assignment-only path).
6547 vm.register_builtin(BUILTIN_XTRACE_ASSIGN, |vm, _argc| {
6548 let on = with_executor(|exec| crate::ported::options::opt_state_get("xtrace").unwrap_or(false));
6549 if on {
6550 // PEEK [..., name, value] — argc==2 by contract.
6551 let len = vm.stack.len();
6552 if len >= 2 {
6553 let name = vm.stack[len - 2].to_str();
6554 let value = vm.stack[len - 1].to_str();
6555 let already_ps4 = XTRACE_DONE_PS4.with(|f| f.get());
6556 if !already_ps4 {
6557 printprompt4();
6558 XTRACE_DONE_PS4.with(|f| f.set(true));
6559 }
6560 // C: `fprintf(xtrerr, "%s=", name)` then `quotedzputs
6561 // (val); fputc(' ', xtrerr);`. Emit no newline.
6562 eprint!("{}={} ", name, quotedzputs(&value));
6563 }
6564 }
6565 fusevm::Value::Status(0)
6566 });
6567
6568 // BUILTIN_XTRACE_NEWLINE — emit trailing `\n` + flush iff a
6569 // prior XTRACE_ASSIGN this line already emitted PS4. Mirrors
6570 // C's `fputc('\n', xtrerr); fflush(xtrerr);` at exec.c:3398
6571 // (the assignment-only path through execcmd_exec).
6572 vm.register_builtin(BUILTIN_XTRACE_NEWLINE, |_vm, _argc| {
6573 let on = with_executor(|exec| crate::ported::options::opt_state_get("xtrace").unwrap_or(false));
6574 if on {
6575 let already_ps4 = XTRACE_DONE_PS4.with(|f| f.get());
6576 if already_ps4 {
6577 eprintln!();
6578 XTRACE_DONE_PS4.with(|f| f.set(false));
6579 }
6580 }
6581 fusevm::Value::Status(0)
6582 });
6583
6584 vm.register_builtin(BUILTIN_ERREXIT_CHECK, |vm, _argc| {
6585 let last = vm.last_status;
6586 if last == 0 {
6587 return fusevm::Value::Status(0);
6588 }
6589 // ZERR / ERR trap fires whenever a command exits non-zero
6590 // (zsh signals.c handle_signals path). Read the trap body
6591 // BEFORE the errexit check so a trap on the failing
6592 // command's last command can run before we exit.
6593 let zerr_body = with_executor(|exec| {
6594 exec.traps
6595 .get("ZERR")
6596 .cloned()
6597 .or_else(|| exec.traps.get("ERR").cloned())
6598 });
6599 if let Some(body) = zerr_body {
6600 // Run the trap. Don't recurse on the trap's own failure
6601 // (clear last_status during the run).
6602 with_executor(|exec| {
6603 let saved = exec.last_status();
6604 exec.set_last_status(0);
6605 let _ = exec.execute_script(&body);
6606 exec.set_last_status(saved);
6607 });
6608 }
6609 let should_exit = with_executor(|exec| {
6610 // zsh stores the option as `errexit` (default OFF). Honor
6611 // both keys (`errexit=true` from `setopt errexit` /
6612 // `set -o errexit`, and `set -e` which currently writes
6613 // `errexit=true` too). Also suppress when inside a function
6614 // call — zsh's errexit lets functions handle their own
6615 // failures unless ERR_RETURN is also set. Also suppress
6616 // when inside a subshell — the in-process snapshot/restore
6617 // doesn't have a process-isolation boundary, so a real
6618 // `process::exit` would tear down the parent shell. Match
6619 // zsh's "errexit aborts the subshell only" by leaving the
6620 // parent alive (subshell continues until natural end).
6621 // errexit lives in two stores. `set -e` / `setopt errexit`
6622 // write through bin_setopt → OPTS_LIVE (canonical
6623 // `opts[ERREXIT]` per Src/options.c:46). Older paths still
6624 // populate `exec.options`. Check both — agree when EITHER
6625 // says on.
6626 let on_canonical = crate::ported::zsh_h::isset(
6627 crate::ported::zsh_h::ERREXIT);
6628 let on_legacy = crate::ported::options::opt_state_get("errexit").unwrap_or(false);
6629 (on_canonical || on_legacy)
6630 && exec.local_scope_depth == 0
6631 && exec.subshell_snapshots.is_empty()
6632 });
6633 if should_exit {
6634 std::process::exit(last);
6635 }
6636 fusevm::Value::Status(last)
6637 });
6638
6639 // `${var:-default}` / `${var:=default}` / `${var:?error}` / `${var:+alt}`
6640 // Pops [name, op_byte, rhs] (rhs popped first). Returns the modified
6641 // value as Value::Str. Handles unset/empty distinction (`:-` etc.
6642 // treat empty same as unset, matching POSIX).
6643 vm.register_builtin(BUILTIN_PARAM_DEFAULT_FAMILY, |vm, _argc| {
6644 let rhs = vm.pop().to_str();
6645 let op = vm.pop().to_int() as u8;
6646 let name = vm.pop().to_str();
6647 // Op codes:
6648 // 0 :- 1 := 2 :? 3 :+ (treat-empty-as-unset variants)
6649 // 4 - 5 = 6 ? 7 + (no-colon: only fire if truly unset)
6650 // The default/alt modifiers handle missing-var themselves, so
6651 // suppress the nounset (set -u) abort during the value lookup —
6652 // otherwise `${unset:-fb}` exits the shell instead of returning
6653 // "fb". Save/restore nounset around the lookup.
6654 let val = with_executor(|exec| {
6655 let saved_nounset = crate::ported::options::opt_state_get("nounset");
6656 let saved_unset = crate::ported::options::opt_state_get("unset");
6657 crate::ported::options::opt_state_set("nounset", false);
6658 crate::ported::options::opt_state_set("unset", true);
6659 let v = exec.get_variable(&name);
6660 match saved_nounset {
6661 Some(b) => {
6662 crate::ported::options::opt_state_set("nounset", b);
6663 }
6664 None => {
6665 crate::ported::options::opt_state_unset("nounset");
6666 }
6667 }
6668 match saved_unset {
6669 Some(b) => {
6670 crate::ported::options::opt_state_set("unset", b);
6671 }
6672 None => {
6673 crate::ported::options::opt_state_unset("unset");
6674 }
6675 }
6676 v
6677 });
6678 let is_set = with_executor(|exec| {
6679 // Positional params ($1, $2, ...): set iff index <= $#.
6680 if name.chars().all(|c| c.is_ascii_digit()) && !name.is_empty() {
6681 if let Ok(idx) = name.parse::<usize>() {
6682 if idx == 0 {
6683 return true; // $0 always set
6684 }
6685 return idx <= exec.pparams().len();
6686 }
6687 }
6688 // zsh-special "always set" params: their getter computes
6689 // a dynamic value, but the contains_key check fails. Treat
6690 // them as set so `${SECONDS-default}` returns the seconds,
6691 // not "default".
6692 let is_zsh_special = matches!(
6693 name.as_str(),
6694 "SECONDS"
6695 | "EPOCHSECONDS"
6696 | "EPOCHREALTIME"
6697 | "RANDOM"
6698 | "LINENO"
6699 | "HISTCMD"
6700 | "PPID"
6701 | "UID"
6702 | "EUID"
6703 | "GID"
6704 | "EGID"
6705 | "SHLVL"
6706 );
6707 exec.has_scalar(&name)
6708 || exec.array(&name).is_some()
6709 || exec.assoc(&name).is_some()
6710 || std::env::var(&name).is_ok()
6711 || is_zsh_special
6712 });
6713 let is_empty = val.is_empty();
6714 // For colon variants, "missing" = unset OR empty.
6715 // For no-colon variants, "missing" = unset only.
6716 let missing = match op {
6717 0..=3 => is_empty,
6718 _ => !is_set,
6719 };
6720 // Empty-unquoted-elide for default-family results. When the
6721 // resulting expansion is empty AND we're unquoted, drop the
6722 // arg. Direct port of zsh's elide-empty-words pass which
6723 // applies to ALL paramsubst results, including default-family.
6724 let in_dq = with_executor(|exec| exec.in_dq_context > 0);
6725 let maybe_elide = |s: String| -> fusevm::Value {
6726 if s.is_empty() && !in_dq {
6727 fusevm::Value::Array(Vec::new())
6728 } else {
6729 fusevm::Value::str(s)
6730 }
6731 };
6732 // The default/alt operand may contain `$var` / `$(cmd)` /
6733 // `$((expr))` — zsh expands these before substitution. Apply
6734 // expand_string lazily (only when we'll actually use rhs).
6735 let expand_rhs = |s: &str| -> String { crate::ported::subst::singsub(s) };
6736 match op {
6737 0 | 4 => {
6738 // `:-` / `-` use default if missing
6739 if missing {
6740 maybe_elide(expand_rhs(&rhs))
6741 } else {
6742 maybe_elide(val)
6743 }
6744 }
6745 1 | 5 => {
6746 // `:=` / `=` assign default if missing, then use it
6747 if missing {
6748 let expanded = expand_rhs(&rhs);
6749 with_executor(|exec| {
6750 exec.set_scalar(name, expanded.clone());
6751 });
6752 maybe_elide(expanded)
6753 } else {
6754 maybe_elide(val)
6755 }
6756 }
6757 2 | 6 => {
6758 // `:?` / `?` error if missing — zsh in -c mode prints
6759 // `zsh:LINE: NAME: msg` and exits 1. Mirror that: emit
6760 // diagnostic on stderr and abort the shell.
6761 if missing {
6762 let expanded = expand_rhs(&rhs);
6763 let msg = if expanded.is_empty() {
6764 "parameter not set".to_string()
6765 } else {
6766 expanded
6767 };
6768 eprintln!("zshrs:1: {}: {}", name, msg);
6769 std::process::exit(1);
6770 } else {
6771 fusevm::Value::str(val)
6772 }
6773 }
6774 3 | 7 => {
6775 // `:+` / `+` use alt if NOT missing (set-and-non-empty
6776 // for colon variant; just set for no-colon variant).
6777 if missing {
6778 maybe_elide(String::new())
6779 } else {
6780 maybe_elide(expand_rhs(&rhs))
6781 }
6782 }
6783 8 => {
6784 // `${+name}` set-test — emits "1" if name is set,
6785 // "0" if unset. Direct port of subst.c case '+' at
6786 // the leading-flag position (different from `${name+rhs}`).
6787 // is_set was computed above and includes positional
6788 // params, zsh-special vars, regular vars, arrays,
6789 // assocs. Subscripted form `${+arr[i]}` checks if
6790 // that specific element is set — get_variable doesn't
6791 // parse subscripts, so resolve the lookup by hand:
6792 // numeric N → arr[N-1] is set iff N <= len; (r)PAT /
6793 // (R)PAT / KEY → resolve via the same subscript
6794 // engine as plain `${arr[i]}`.
6795 if let Some(lb) = name.find('[') {
6796 if name.ends_with(']') {
6797 let arr_name = &name[..lb];
6798 let key = &name[lb + 1..name.len() - 1];
6799 let direct_set = with_executor(|exec| {
6800 // Numeric index: 1-based, must be in range.
6801 if let Ok(n) = key.parse::<i64>() {
6802 let len = exec
6803 .array(arr_name)
6804 .map(|a| a.len() as i64)
6805 .unwrap_or(0);
6806 if n > 0 && n <= len {
6807 return Some(true);
6808 }
6809 if n < 0 {
6810 let resolved = len + n;
6811 return Some(resolved >= 0);
6812 }
6813 return Some(false);
6814 }
6815 if let Some(map) = exec.assoc(arr_name) {
6816 return Some(map.contains_key(key));
6817 }
6818 if let Some(arr) = exec.array(arr_name) {
6819 let pat = if let Some(p) = key
6820 .strip_prefix("(r)")
6821 .or_else(|| key.strip_prefix("(R)"))
6822 {
6823 p
6824 } else {
6825 key
6826 };
6827 return Some(arr.iter().any(|el| {
6828 crate::exec::glob_match_static(el, pat)
6829 }));
6830 }
6831 None
6832 });
6833 // Magic-assoc fallback (commands, aliases,
6834 // functions, options, etc.) — `${+commands[ls]}`
6835 // walks PATH to answer "is ls a command". Direct
6836 // port of zsh's getindex routing through the
6837 // special-parameter getfn (Src/params.c
6838 // SPECIAL_PARAMS) when the named assoc isn't
6839 // user-declared. Re-uses the same magic_assoc_lookup
6840 // dispatcher BUILTIN_ARRAY_INDEX consults; called
6841 // outside the with_executor closure so the lookup
6842 // itself can re-enter the executor lock safely.
6843 let element_set = direct_set.unwrap_or_else(|| {
6844 magic_assoc_lookup(arr_name, key)
6845 .map(|v| !v.to_str().is_empty())
6846 .unwrap_or(false)
6847 });
6848 return fusevm::Value::str(if element_set { "1" } else { "0" });
6849 }
6850 fusevm::Value::str(if !val.is_empty() { "1" } else { "0" })
6851 } else {
6852 fusevm::Value::str(if is_set { "1" } else { "0" })
6853 }
6854 }
6855 _ => fusevm::Value::str(val),
6856 }
6857 });
6858
6859 // `${var:offset[:length]}` — substring. Pops [name, offset, length].
6860 // length == -1 means "rest of string". Negative offset counts from end.
6861 vm.register_builtin(BUILTIN_PARAM_SUBSTRING, |vm, _argc| {
6862 let length = vm.pop().to_int();
6863 let offset = vm.pop().to_int();
6864 let name = vm.pop().to_str();
6865 // `${@:offset:length}` / `${*:offset:length}` — slice
6866 // positional parameters as ARRAY elements (not chars). zsh's
6867 // semantics: 1-based, inclusive offset; length counts elems.
6868 // For arrays/assoc-values arrays, same array semantics.
6869 // `[@]`/`[*]` suffix preserved by the compile path indicates
6870 // the user wrote `${arr[@]:n}` and expects splice; return
6871 // Value::Array so downstream array-init keeps element
6872 // boundaries.
6873 let (lookup_name, force_array) = if let Some(stripped) = name
6874 .strip_suffix("[@]")
6875 .or_else(|| name.strip_suffix("[*]"))
6876 {
6877 (stripped.to_string(), true)
6878 } else {
6879 (name.clone(), false)
6880 };
6881 if lookup_name == "@" || lookup_name == "*" {
6882 let result = with_executor(|exec| slice_positionals(exec, offset, length));
6883 return fusevm::Value::Array(result.into_iter().map(fusevm::Value::str).collect());
6884 }
6885 let array_slice = with_executor(|exec| exec.array(&lookup_name));
6886 if let Some(arr) = array_slice {
6887 let result = slice_array_zero_based(&arr, offset, length);
6888 return if force_array {
6889 fusevm::Value::Array(result.into_iter().map(fusevm::Value::str).collect())
6890 } else {
6891 fusevm::Value::str(result.join(" "))
6892 };
6893 }
6894 let name = lookup_name;
6895 let val = with_executor(|exec| exec.get_variable(&name));
6896 let chars: Vec<char> = val.chars().collect();
6897 let len = chars.len() as i64;
6898 let start = if offset < 0 {
6899 (len + offset).max(0) as usize
6900 } else {
6901 (offset as usize).min(chars.len())
6902 };
6903 // length sentinels:
6904 // i64::MIN → no length given, take rest of string
6905 // negative → "stop N chars before end" (bash/zsh)
6906 // positive → take exactly N chars
6907 let take = if length == i64::MIN {
6908 chars.len().saturating_sub(start)
6909 } else if length < 0 {
6910 // Stop |length| chars before end.
6911 let end = (len + length).max(start as i64) as usize;
6912 end.saturating_sub(start)
6913 } else {
6914 (length as usize).min(chars.len().saturating_sub(start))
6915 };
6916 let result: String = chars.iter().skip(start).take(take).collect();
6917 fusevm::Value::str(result)
6918 });
6919
6920 // `${var:offset[:length]}` with arith/var-based offset/length —
6921 // the literal-int variant above can't represent `${s:$n:2}`.
6922 // Stack layout (top→bottom): has_length, length_expr, offset_expr,
6923 // name. has_length distinguishes "no length given" from
6924 // "length=0".
6925 vm.register_builtin(BUILTIN_PARAM_SUBSTRING_EXPR, |vm, _argc| {
6926 let has_len = vm.pop().to_int() != 0;
6927 let len_expr = vm.pop().to_str();
6928 let off_expr = vm.pop().to_str();
6929 let name = vm.pop().to_str();
6930 // Match BUILTIN_PARAM_SUBSTRING's array-aware dispatch:
6931 // `${@:n:m}` / `${arr[@]:n:m}` slice positionals/array
6932 // ELEMENTS, not chars. Without this, the expr-form fell
6933 // back to scalar char-slicing on the IFS-joined value.
6934 let (lookup_name, force_array) = if let Some(stripped) = name
6935 .strip_suffix("[@]")
6936 .or_else(|| name.strip_suffix("[*]"))
6937 {
6938 (stripped.to_string(), true)
6939 } else {
6940 (name.clone(), false)
6941 };
6942 // Use a dual-result: Array when force_array, Str otherwise.
6943 // zsh: `${a[@]:1}` keeps array splice for downstream array
6944 // assignment (`b=("${a[@]:1}")` should give 2 elements, not
6945 // a single space-joined string).
6946 enum Result {
6947 Str(String),
6948 Arr(Vec<String>),
6949 }
6950 let result = with_executor(|exec| {
6951 let offset = crate::ported::math::mathevali(&crate::ported::subst::singsub(&off_expr)).unwrap_or(0);
6952 let length_opt: Option<i64> = if has_len {
6953 Some(crate::ported::math::mathevali(&crate::ported::subst::singsub(&len_expr)).unwrap_or(0))
6954 } else {
6955 None
6956 };
6957 // Positional-param slice (`${@:1:2}`).
6958 if lookup_name == "@" || lookup_name == "*" {
6959 let parts = slice_positionals(exec, offset, length_opt.unwrap_or(i64::MIN));
6960 return Result::Arr(parts);
6961 }
6962 // Array slice (`${arr:1:2}` or `${arr[@]:1:2}`).
6963 if let Some(arr) = exec.array(&lookup_name) {
6964 let sliced = slice_array_zero_based(&arr, offset, length_opt.unwrap_or(i64::MIN));
6965 return if force_array {
6966 Result::Arr(sliced)
6967 } else {
6968 Result::Str(sliced.join(" "))
6969 };
6970 }
6971 // Scalar fallback.
6972 let val = exec.get_variable(&lookup_name);
6973 let chars: Vec<char> = val.chars().collect();
6974 let len = chars.len() as i64;
6975 let start = if offset < 0 {
6976 (len + offset).max(0) as usize
6977 } else {
6978 (offset as usize).min(chars.len())
6979 };
6980 let take = match length_opt {
6981 None => chars.len().saturating_sub(start),
6982 Some(length) if length < 0 => chars.len().saturating_sub(start),
6983 Some(length) => (length as usize).min(chars.len().saturating_sub(start)),
6984 };
6985 Result::Str(chars.iter().skip(start).take(take).collect::<String>())
6986 });
6987 match result {
6988 Result::Str(s) => fusevm::Value::str(s),
6989 Result::Arr(parts) => {
6990 fusevm::Value::Array(parts.into_iter().map(fusevm::Value::str).collect())
6991 }
6992 }
6993 });
6994
6995 // `${var#pat}` / `${var##pat}` / `${var%pat}` / `${var%%pat}`
6996 // Pops [name, pattern, op_byte]. op: 0=`#` short-prefix, 1=`##` long,
6997 // 2=`%` short-suffix, 3=`%%` long. Glob-pattern matching via the
6998 // existing glob_match_static helper.
6999 vm.register_builtin(BUILTIN_PARAM_STRIP, |vm, _argc| {
7000 // The compiler now passes `dq_flag` as a 4th arg so the
7001 // runtime can distinguish DQ-wrapped (join-then-strip)
7002 // from unquoted (per-element) on array-valued names.
7003 // Mirrors zsh's pattern.c split between `getmatch` (joined
7004 // scalar) and `getmatcharr` (per-element).
7005 let dq_flag = vm.pop().to_int() != 0;
7006 let op = vm.pop().to_int() as u8;
7007 let pattern_raw = vm.pop().to_str();
7008 let name = vm.pop().to_str();
7009 // SUB_M / SUB_S flags. M = return matched portion (vs strip
7010 // result). S = search anywhere instead of anchored to start
7011 // (#/##) or end (%/%%). Direct port of subst.c:2171/2186
7012 // SUB_MATCH / SUB_SUBSTR bits + getmatch dispatch.
7013 let (sub_match, sub_substr) = {
7014 let sf = crate::ported::subst::sub_flags_get();
7015 let m = (sf & 0x0008) != 0;
7016 let s = (sf & 0x0004) != 0;
7017 crate::ported::subst::sub_flags_set(0);
7018 (m, s)
7019 };
7020 // Pattern may contain `$var` / `$(cmd)` / `$((expr))` — zsh
7021 // expands these before applying the strip. Was emitted as-is.
7022 let pattern = crate::ported::subst::singsub(&pattern_raw);
7023 // Delegate to the shared `strip_match_op` helper (also used
7024 // by the flag-aware `expand_braced_variable` path so M-flag
7025 // inversion works consistently). The compile-time fast path
7026 // never carries (M) since `parse_param_modifier` rejects
7027 // flag forms and routes them through the bridge — so always
7028 // pass `m_flag=false` here.
7029 // strip_match_op port — direct inline of subst.c:3540's
7030 // SUB_MATCH dispatch on the # / ## / % / %% pattern strip
7031 // ops. Op codes per ParamModifierKind::Strip:
7032 // 0 = `#` shortest prefix
7033 // 1 = `##` longest prefix
7034 // 2 = `%` shortest suffix
7035 // 3 = `%%` longest suffix
7036 // Pattern matching is currently glob-via-fnmatch from
7037 // crate::ported::glob::glob_match_static (handles ?, *, [...]).
7038 let strip_one = |v: &str, op: u8, pattern: &str| -> String {
7039 let chars: Vec<char> = v.chars().collect();
7040 let n = chars.len();
7041 // (S) substring search: instead of anchoring to start
7042 // (#/##) or end (%/%%), find the shortest/longest match
7043 // ANYWHERE in v, and either return it (sub_match) or
7044 // remove it (default — keep parts before+after the match).
7045 // Direct port of subst.c:2186 SUB_SUBSTR bit which
7046 // getmatch routes through pat_substr_match.
7047 if sub_substr { // c:2186
7048 let longest = matches!(op, 1 | 3); // c:2186 (## / %% want longest)
7049 let mut best: Option<(usize, usize)> = None; // c:2186 (start, end in chars)
7050 // Slide a window across v; for each start index
7051 // try every (longest|shortest) length that matches.
7052 for start in 0..=n { // c:2186
7053 let end_iter: Box<dyn Iterator<Item = usize>> = if longest { // c:2186
7054 Box::new((start..=n).rev()) // c:2186
7055 } else { // c:2186
7056 Box::new(start..=n) // c:2186
7057 }; // c:2186
7058 for end in end_iter { // c:2186
7059 let sub: String = chars[start..end].iter().collect(); // c:2186
7060 if crate::exec::glob_match_static(&sub, pattern) { // c:2186
7061 // (S) prefers the leftmost match
7062 // for # / ##, and the rightmost for
7063 // % / %%. # / ## scan left-to-right;
7064 // % / %% mirror by walking start
7065 // backward at the outer level — but
7066 // since the outer loop is L-to-R, we
7067 // record EVERY match and pick the
7068 // last one for %/%%, first for #/##.
7069 let suffix_op = matches!(op, 2 | 3); // c:2186
7070 if best.is_none() || suffix_op { // c:2186
7071 best = Some((start, end)); // c:2186
7072 } // c:2186
7073 if !suffix_op { break; } // c:2186 (#/## stop at first)
7074 } // c:2186
7075 } // c:2186
7076 if best.is_some() && !matches!(op, 2 | 3) { break; } // c:2186
7077 } // c:2186
7078 if let Some((s, e)) = best { // c:2186
7079 let matched: String = chars[s..e].iter().collect(); // c:2186
7080 if sub_match { // c:2171
7081 return matched; // c:2171
7082 } // c:2171
7083 let mut out = String::new(); // c:2186
7084 out.extend(chars[..s].iter()); // c:2186
7085 out.extend(chars[e..].iter()); // c:2186
7086 return out; // c:2186
7087 } // c:2186
7088 return if sub_match { String::new() } else { v.to_string() }; // c:2186
7089 } // c:2186
7090 // (M) inverted-disposition helper: when sub_match is set,
7091 // return the MATCHED portion instead of the post-strip
7092 // string. Used by zsh idioms like \${(M)path#*/} which
7093 // returns the leading "/segment" rather than the rest.
7094 // Direct port of getmatch's SUB_MATCH branch — it picks
7095 // the matched-portion view from the same scan.
7096 match op {
7097 0 => {
7098 // shortest prefix strip — try k = 0, 1, ...
7099 for k in 0..=n {
7100 let prefix: String = chars[..k].iter().collect();
7101 if crate::exec::glob_match_static(&prefix, pattern) {
7102 return if sub_match { // c:2171
7103 prefix // c:2171
7104 } else { // c:2171
7105 chars[k..].iter().collect()
7106 };
7107 }
7108 }
7109 if sub_match { String::new() } else { v.to_string() } // c:2171
7110 }
7111 1 => {
7112 // longest prefix strip — try k = n down to 0
7113 for k in (0..=n).rev() {
7114 let prefix: String = chars[..k].iter().collect();
7115 if crate::exec::glob_match_static(&prefix, pattern) {
7116 return if sub_match { // c:2171
7117 prefix // c:2171
7118 } else { // c:2171
7119 chars[k..].iter().collect()
7120 };
7121 }
7122 }
7123 if sub_match { String::new() } else { v.to_string() } // c:2171
7124 }
7125 2 => {
7126 // shortest suffix strip
7127 for k in 0..=n {
7128 let suffix: String = chars[n - k..].iter().collect();
7129 if crate::exec::glob_match_static(&suffix, pattern) {
7130 return if sub_match { // c:2171
7131 suffix // c:2171
7132 } else { // c:2171
7133 chars[..n - k].iter().collect()
7134 };
7135 }
7136 }
7137 if sub_match { String::new() } else { v.to_string() } // c:2171
7138 }
7139 3 => {
7140 // longest suffix strip
7141 for k in (0..=n).rev() {
7142 let suffix: String = chars[n - k..].iter().collect();
7143 if crate::exec::glob_match_static(&suffix, pattern) {
7144 return if sub_match { // c:2171
7145 suffix // c:2171
7146 } else { // c:2171
7147 chars[..n - k].iter().collect()
7148 };
7149 }
7150 }
7151 if sub_match { String::new() } else { v.to_string() } // c:2171
7152 }
7153 _ => v.to_string(),
7154 }
7155 };
7156 // `${arr#pat}` / `${arr%pat}` / etc. on an array:
7157 // - Unquoted form: iterate per element, preserve array
7158 // shape so `print -l` emits one line per element. Direct
7159 // port of Src/subst.c:3422-3433 `if (!vunset && isarr)`
7160 // branch which calls `getmatcharr(&aval, …)` — modifies
7161 // each element of the array in-place, leaves isarr=1.
7162 // - DQ-wrapped form (`"${arr%pat}"`): zsh joins as scalar
7163 // first then strips. So `(/tmp/foo /etc/bar)` with `%/*`
7164 // gives `/tmp/foo /etc` (last `/bar` stripped from
7165 // joined), not `/tmp /etc` (per-element).
7166 enum StripResult {
7167 Scalar(String),
7168 Array(Vec<String>),
7169 }
7170 let result: StripResult = with_executor(|exec| {
7171 let in_dq = dq_flag || exec.in_dq_context > 0;
7172 if name == "@" || name == "*" {
7173 if in_dq {
7174 let joined = exec.pparams().join(" ");
7175 return StripResult::Scalar(strip_one(&joined, op, &pattern));
7176 }
7177 let stripped: Vec<String> = exec
7178 .pparams()
7179 .iter()
7180 .map(|e| strip_one(e, op, &pattern))
7181 .collect();
7182 return StripResult::Array(stripped);
7183 }
7184 if let Some(arr) = exec.array(&name) {
7185 if in_dq {
7186 let joined = arr.join(" ");
7187 return StripResult::Scalar(strip_one(&joined, op, &pattern));
7188 }
7189 let stripped: Vec<String> = arr
7190 .iter()
7191 .map(|e| strip_one(e, op, &pattern))
7192 .collect();
7193 return StripResult::Array(stripped);
7194 }
7195 let val = exec.get_variable(&name);
7196 StripResult::Scalar(strip_one(&val, op, &pattern))
7197 });
7198 match result {
7199 StripResult::Scalar(s) => fusevm::Value::str(s),
7200 StripResult::Array(arr) => {
7201 let mapped: Vec<fusevm::Value> = arr.into_iter().map(fusevm::Value::str).collect();
7202 fusevm::Value::Array(mapped)
7203 }
7204 }
7205 });
7206
7207 // `$((expr))` — pops [expr_string], evaluates via MathEval which
7208 // honors integer-vs-float distinction (zsh-compatible). Returns
7209 // the result as Value::Str so it can be Concat'd into surrounding
7210 // word context.
7211 vm.register_builtin(BUILTIN_ARITH_EVAL, |vm, _argc| {
7212 let expr = vm.pop().to_str();
7213 let result = crate::ported::subst::arithsubst(&expr, "", "");
7214 fusevm::Value::str(result)
7215 });
7216
7217 // `$(cmd)` — pops [cmd_string], routes through
7218 // run_command_substitution which performs an in-process pipe-capture.
7219 // Avoids the Op::CmdSubst sub-chunk word-emit bug
7220 // (`printf "a\nb"` produced "anb" via that path). Returns trimmed
7221 // output (trailing newlines stripped per POSIX cmd-sub semantics).
7222 vm.register_builtin(BUILTIN_CMD_SUBST_TEXT, |vm, _argc| {
7223 let cmd = vm.pop().to_str();
7224 // Inherit live $? into the inner shell so cmd-subst sees the
7225 // parent's most recent exit. Same rationale as the mode-3
7226 // backtick path above.
7227 let live_status = vm.last_status;
7228 let result = with_executor(|exec| {
7229 exec.set_last_status(live_status);
7230 exec.run_command_substitution(&cmd)
7231 });
7232 // Mirror run_command_substitution's exec.last_status side
7233 // effect into the VM's live counter so a containing
7234 // assignment's BUILTIN_SET_VAR — which reads vm.last_status
7235 // — sees the cmd-subst's exit. Without this, `a=$(false);
7236 // echo $?` reads stale 0 (vm.last_status was zeroed by
7237 // compile_assign's prelude SetStatus, and run_cmd_subst only
7238 // updated exec.last_status). Pull the value back through
7239 // exec since it owns the canonical post-subst record.
7240 let cs_status = with_executor(|exec| exec.last_status());
7241 vm.last_status = cs_status;
7242 fusevm::Value::str(result)
7243 });
7244
7245 // Text-based word expansion. Pops [preserved_text, mode_byte].
7246 // mode_byte:
7247 // 0 = Default — expand_string + xpandbraces + expand_glob
7248 // 1 = DoubleQuoted — strip outer `"…"`, expand_string only
7249 // (no brace, no glob — DQ semantics)
7250 // 2 = SingleQuoted — strip outer `'…'`, no expansion
7251 // (kept for symmetry; Snull early-return covers most SQ)
7252 // 3 = AltBackquote — strip backticks, run as cmd-sub
7253 // Single result → Value::str; multi → Value::Array.
7254 vm.register_builtin(BUILTIN_EXPAND_TEXT, |vm, _argc| {
7255 let mode = vm.pop().to_int() as u8;
7256 let text = vm.pop().to_str();
7257 // Sync vm.last_status → exec.last_status so cmd-subst (mode 3)
7258 // and any nested $? reads inside singsub see the live `$?`
7259 // from the most recent VM op. Without this, cmd-subst inside
7260 // arg-eval saw a stale exec.last_status that was zeroed at
7261 // the start of the current statement. Direct port of zsh's
7262 // pre-cmdsubst lastval propagation per Src/exec.c:4770.
7263 let live_status = vm.last_status;
7264 with_executor(|exec| match mode {
7265 // Mode 1 = DoubleQuoted (argument context).
7266 // Mode 5 = DoubleQuoted in scalar-assignment context.
7267 // Both share the same DQ unescape pre-processing; mode 5
7268 // additionally bumps `in_scalar_assign` so subst_port's
7269 // paramsubst sees ssub=true and suppresses split flags
7270 // `(f)` / `(s:STR:)` / `(0)` per Src/subst.c:1759 +
7271 // Src/exec.c::addvars line 2546 (the PREFORK_SINGLE bit
7272 // C zsh sets when prefork-ing the assignment RHS).
7273 1 | 5 => {
7274 // DoubleQuoted: strip outer `"…"` if present. In DQ
7275 // context, `\` escapes the DQ-special chars `$`, `` ` ``,
7276 // `"`, `\`. zsh's expand_string expects the lexer's
7277 // `\0X` literal-marker for an already-escaped char, so
7278 // we pre-process: `\$` → `\0$`, `\\` → `\0\`, etc. Then
7279 // expand_string handles the rest.
7280 let inner = if text.len() >= 2 && text.starts_with('"') && text.ends_with('"') {
7281 &text[1..text.len() - 1]
7282 } else {
7283 text.as_str()
7284 };
7285 let mut prepped = String::with_capacity(inner.len());
7286 let mut chars = inner.chars().peekable();
7287 while let Some(c) = chars.next() {
7288 if c == '\\' {
7289 match chars.peek() {
7290 Some('$') | Some('`') | Some('"') | Some('\\') => {
7291 prepped.push('\x00');
7292 prepped.push(chars.next().unwrap());
7293 }
7294 _ => prepped.push(c),
7295 }
7296 } else {
7297 prepped.push(c);
7298 }
7299 }
7300 // Tell parameter-flag application that we're inside
7301 // double quotes — array-only flags ((o), (O), (n),
7302 // (i), (M), (u)) must be no-ops here per zsh.
7303 exec.in_dq_context += 1;
7304 if mode == 5 {
7305 exec.in_scalar_assign += 1;
7306 }
7307 let out = crate::ported::subst::singsub(&prepped);
7308 if mode == 5 {
7309 exec.in_scalar_assign -= 1;
7310 }
7311 exec.in_dq_context -= 1;
7312 fusevm::Value::str(out)
7313 }
7314 2 => {
7315 // SingleQuoted: pure literal, strip outer `'…'`.
7316 let inner = if text.len() >= 2 && text.starts_with('\'') && text.ends_with('\'') {
7317 &text[1..text.len() - 1]
7318 } else {
7319 text.as_str()
7320 };
7321 fusevm::Value::str(inner.to_string())
7322 }
7323 3 => {
7324 // Backquote command sub: strip outer backticks.
7325 // Word-split the result on IFS when the surrounding
7326 // word is unquoted — zsh: `print -l \`echo a b c\``
7327 // emits one arg per word. The $(…) path applies the
7328 // same split via BUILTIN_WORD_SPLIT after capture; do
7329 // the equivalent here for the `…` form.
7330 let inner = if text.len() >= 2 && text.starts_with('`') && text.ends_with('`') {
7331 &text[1..text.len() - 1]
7332 } else {
7333 text.as_str()
7334 };
7335 // Apply the live VM status before running the inner
7336 // shell so the inherited $? matches zsh's lastval
7337 // propagation.
7338 exec.set_last_status(live_status);
7339 let captured = exec.run_command_substitution(inner);
7340 let trimmed = captured.trim_end_matches('\n');
7341 if exec.in_dq_context > 0 {
7342 fusevm::Value::str(trimmed.to_string())
7343 } else {
7344 let ifs = exec
7345 .scalar("IFS")
7346 .unwrap_or_else(|| " \t\n".to_string());
7347 let parts: Vec<fusevm::Value> = trimmed
7348 .split(|c: char| ifs.contains(c))
7349 .filter(|s| !s.is_empty())
7350 .map(|s| fusevm::Value::str(s.to_string()))
7351 .collect();
7352 if parts.is_empty() {
7353 fusevm::Value::str(String::new())
7354 } else if parts.len() == 1 {
7355 parts.into_iter().next().unwrap()
7356 } else {
7357 fusevm::Value::Array(parts)
7358 }
7359 }
7360 }
7361 4 => {
7362 // HeredocBody: expand variables / command-subst / arith
7363 // but NOT glob or brace. Heredoc lines like `[42]` must
7364 // pass through verbatim — running them through the
7365 // default pipeline triggers NOMATCH on the literal.
7366 fusevm::Value::str(crate::ported::subst::singsub(&text))
7367 }
7368 _ => {
7369 // Default: full expansion pipeline.
7370 // Pre-process backslash-escapes to the `\x00X` literal-
7371 // marker form so expand_string suppresses variable
7372 // expansion on escaped specials: `\$` → literal `$`,
7373 // `\\` → literal `\`, `\`` → literal `` ` ``. Without
7374 // this, `echo \$a` ran `\` literally then expanded
7375 // `$a`, leaving a stray `\` that echo's escape
7376 // interpreter then turned into form-feed when followed
7377 // by `f`-like content.
7378 let mut prepped = String::with_capacity(text.len());
7379 let mut it = text.chars().peekable();
7380 while let Some(c) = it.next() {
7381 if c == '\\' {
7382 match it.peek() {
7383 Some('$') | Some('`') | Some('"') | Some('\'') | Some('\\') => {
7384 prepped.push('\x00');
7385 prepped.push(it.next().unwrap());
7386 }
7387 // Don't preprocess `\{` / `\}` here — the
7388 // brace-expansion stage has its own
7389 // has_balanced_escaped_braces detector that
7390 // strips the backslashes when both sides
7391 // are escaped. Touching them here would
7392 // hide them from that detector.
7393 _ => prepped.push(c),
7394 }
7395 } else {
7396 prepped.push(c);
7397 }
7398 }
7399 let expanded = crate::ported::subst::singsub(&prepped);
7400 let brace_expanded = vec![expanded.to_string()];
7401 // zsh stores the option as `glob` (default ON);
7402 // `setopt noglob` writes `glob=false`. Honor either
7403 // form so the dispatcher behaves the same as zsh.
7404 let noglob = crate::ported::options::opt_state_get("noglob").unwrap_or(false)
7405 || crate::ported::options::opt_state_get("GLOB").map(|v| !v).unwrap_or(false)
7406 || !crate::ported::options::opt_state_get("glob").unwrap_or(true);
7407 let parts: Vec<String> = brace_expanded
7408 .into_iter()
7409 .flat_map(|s| {
7410 // The lexer leaves glob metacharacters in their
7411 // META-encoded form: `*` → `\u{87}`, `?` →
7412 // `\u{86}`, `[` → `\u{91}`, etc. expand_string
7413 // doesn't untokenize them, so the literal-char
7414 // checks below (`s.contains('*')`) would miss
7415 // every real glob and skip expand_glob — that
7416 // bug let `echo *.toml` print the literal
7417 // `*.toml` because the META `\u{87}` never
7418 // matched the literal `*`. Untokenize once so
7419 // the metacharacter checks see the canonical
7420 // form. zsh's pattern.c expects `*` etc. as
7421 // bare chars at the glob layer.
7422 let s = crate::lex::untokenize(&s);
7423 // Skip glob expansion for assignment-shaped
7424 // words (`NAME=value`). zsh doesn't expand the
7425 // RHS of an assignment as a path glob unless
7426 // `setopt globassign` is set, and feeding such
7427 // words through expand_glob makes NOMATCH
7428 // (default ON) fire spuriously on
7429 // `integer i=2*3+1`, `path=*.rs`, etc.
7430 let is_assignment_shape = {
7431 let bytes = s.as_bytes();
7432 let mut i = 0;
7433 if !bytes.is_empty()
7434 && (bytes[0] == b'_' || bytes[0].is_ascii_alphabetic())
7435 {
7436 i += 1;
7437 while i < bytes.len()
7438 && (bytes[i] == b'_' || bytes[i].is_ascii_alphanumeric())
7439 {
7440 i += 1;
7441 }
7442 i < bytes.len() && bytes[i] == b'='
7443 } else {
7444 false
7445 }
7446 };
7447 // Also trigger expand_glob when the word ends
7448 // with a `(...)` qualifier suffix even without
7449 // any other glob metachar — `/etc/hosts(mh-100)`,
7450 // `path(.)`, etc.
7451 let has_qual_suffix =
7452 s.ends_with(')') && s.contains('(') && !s.contains('|');
7453 // extendedglob `^pat` (negation) and `pat~excl`
7454 // (exclusion). Trigger expand_glob so the runtime
7455 // can apply the appropriate filter. Both require
7456 // `setopt extendedglob` — runtime falls through
7457 // to literal if that's off.
7458 let extglob_meta =
7459 crate::ported::options::opt_state_get("extendedglob").unwrap_or(false)
7460 && (s.starts_with('^') || s.contains('~') || s.contains("/^"));
7461 let has_numeric_range = s.contains('<')
7462 && s.contains('>')
7463 && !crate::ported::pattern::extract_numeric_ranges(&s).is_empty();
7464 // Glob alternation `(a|b|c)` is a primary
7465 // zsh feature — `/etc/(passwd|hostname)`
7466 // should expand to file matches. Detected
7467 // by `(` ... `|` ... `)` shape; the actual
7468 // top-level-vs-nested check happens in
7469 // expand_glob_alternation.
7470 let has_alternation = s.contains('(') && s.contains('|') && s.contains(')');
7471 if !noglob
7472 && !is_assignment_shape
7473 && (s.contains('*')
7474 || s.contains('?')
7475 || s.contains('[')
7476 || has_qual_suffix
7477 || extglob_meta
7478 || has_numeric_range
7479 || has_alternation)
7480 {
7481 exec.expand_glob(&s)
7482 } else {
7483 vec![s]
7484 }
7485 })
7486 .collect();
7487 if parts.len() == 1 {
7488 let only = parts.into_iter().next().unwrap_or_default();
7489 // Empty unquoted expansion → drop the arg entirely
7490 // (zsh "remove empty unquoted words" rule). Returning
7491 // an empty Value::Array makes pop_args contribute zero
7492 // items. Direct port of subst.c's empty-elide pass at
7493 // the end of multsub which removes empty linknodes
7494 // from unquoted contexts. Quoted DQ/SQ paths (modes
7495 // 1/2/5) take separate arms above and always emit
7496 // Value::Str so the empty arg survives.
7497 if only.is_empty() {
7498 fusevm::Value::Array(Vec::new())
7499 } else {
7500 fusevm::Value::str(only)
7501 }
7502 } else {
7503 fusevm::Value::Array(parts.into_iter().map(fusevm::Value::str).collect())
7504 }
7505 }
7506 })
7507 });
7508
7509 // `${#name}` — pops [name]. Returns the value's element count for
7510 // arrays (indexed and assoc) or character length for scalars.
7511 vm.register_builtin(BUILTIN_PARAM_LENGTH, |vm, _argc| {
7512 let name_raw = vm.pop().to_str();
7513 // Strip `[@]` / `[*]` subscript suffix — `${#arr[@]}` and
7514 // `${#m[@]}` are element-count forms, same as `${#arr}` /
7515 // `${#m}`. Fast paths sometimes hand us the bare name and
7516 // sometimes leave the subscript attached.
7517 let name = name_raw
7518 .strip_suffix("[@]")
7519 .or_else(|| name_raw.strip_suffix("[*]"))
7520 .unwrap_or(&name_raw)
7521 .to_string();
7522 // `${#arr[N]}` — length of the Nth ELEMENT, not the array
7523 // count. Verified empirically: arr=(aa bb ccc); ${#arr[2]} → 2
7524 // in real zsh. Resolve the bare name + bracketed subscript
7525 // (with embedded `$VAR` references expanded) to a single
7526 // value, then count its chars. Skip `[@]` / `[*]` — those
7527 // were stripped above as splice forms.
7528 if let Some(open) = name.find('[') {
7529 if name.ends_with(']') && &name[open..] != "[@]" && &name[open..] != "[*]" {
7530 let bare = &name[..open];
7531 let raw_idx = &name[open + 1..name.len() - 1];
7532 let elem = with_executor(|exec| {
7533 // Expand `$VAR` / `${VAR}` references inside the
7534 // subscript before lookup (single dollar pass).
7535 let resolved_idx = expand_dollar_refs(raw_idx, exec);
7536 if let Some(arr) = exec.array(bare) {
7537 if let Ok(n) = resolved_idx.trim().parse::<i64>() {
7538 let len = arr.len() as i64;
7539 let idx = if n > 0 { n - 1 } else if n < 0 { len + n } else { -1 };
7540 if idx >= 0 && (idx as usize) < arr.len() {
7541 return arr[idx as usize].clone();
7542 }
7543 }
7544 return String::new();
7545 }
7546 if let Some(map) = exec.assoc(bare) {
7547 return map.get(resolved_idx.as_str()).cloned().unwrap_or_default();
7548 }
7549 String::new()
7550 });
7551 return fusevm::Value::str(elem.chars().count().to_string());
7552 }
7553 }
7554 let count = with_executor(|exec| {
7555 // ${#@} / ${#*} → count of positional params (= $#).
7556 // Without this, `@`/`*` fell through to `get_variable`
7557 // which returned the IFS-joined positional string and
7558 // we counted chars (5 for "a b c" instead of 3).
7559 if name == "@" || name == "*" || name == "argv" {
7560 return exec.pparams().len();
7561 }
7562 // Magic-array specials whose length is data-driven, not
7563 // taken from `exec.arrays`/`exec.assoc_arrays`. Direct
7564 // ports of the relevant `SPECIALPMDEF` entries:
7565 // - `errnos` → Src/Modules/system.c:902
7566 // - `commands` → Src/Modules/parameter.c
7567 // - `aliases` → Src/Modules/parameter.c
7568 // - `functions` → Src/Modules/parameter.c
7569 // - `parameters` → Src/Modules/parameter.c
7570 // - `options` → Src/Modules/parameter.c
7571 // - `sysparams` → Src/Modules/system.c:904
7572 match name.as_str() {
7573 "errnos" => return crate::modules::system::ERRNO_NAMES.len(),
7574 "epochtime" => return 2, // [seconds, nanoseconds]
7575 "commands" => return crate::ported::hashtable::cmdnamtab_lock()
7576 .read().map(|t| t.len()).unwrap_or(0),
7577 "aliases" => return exec.alias_entries().len(),
7578 "galiases" => return exec.global_alias_entries().len(),
7579 "saliases" => return exec.suffix_alias_entries().len(),
7580 "functions" => return exec.function_names().len(),
7581 "options" => return crate::ported::options::opt_state_len(),
7582 "sysparams" => return 3, // pid, ppid, procsubstpid
7583 // Magic-assoc lengths backed by canonical scanners.
7584 // Direct ports of parameter.c SPECIALPMDEF entries —
7585 // each scan callback emits one entry per node, so the
7586 // count is the length of the scan_magic_assoc_keys
7587 // collected list.
7588 "builtins" | "dis_builtins"
7589 | "dis_functions" | "dis_aliases"
7590 | "dis_galiases" | "dis_saliases" => {
7591 return crate::exec::scan_magic_assoc_keys(&name)
7592 .map(|v| v.len())
7593 .unwrap_or(0);
7594 }
7595 _ => {}
7596 }
7597 if let Some(arr) = exec.array(&name) {
7598 arr.len()
7599 } else if let Some(assoc) = exec.assoc(&name) {
7600 assoc.len()
7601 } else {
7602 exec.get_variable(&name).chars().count()
7603 }
7604 });
7605 fusevm::Value::str(count.to_string())
7606 });
7607
7608 // `${var/pat/repl}` / `${var//pat/repl}` / `${var/#pat/repl}` /
7609 // `${var/%pat/repl}` — Pops [name, pattern, replacement, op_byte].
7610 // op: 0=first, 1=all, 2=anchor-prefix (`/#`), 3=anchor-suffix (`/%`).
7611 vm.register_builtin(BUILTIN_PARAM_REPLACE, |vm, _argc| {
7612 let dq_flag = vm.pop().to_int() != 0;
7613 let op = vm.pop().to_int() as u8;
7614 let repl_raw = vm.pop().to_str();
7615 let pattern_raw = vm.pop().to_str();
7616 let name = vm.pop().to_str();
7617 // SUB_* flag bits set by the (M)/(R)/(B)/(E)/(N)/(S) flag-loop
7618 // arms. Direct port of zsh's getmatch() flag dispatch — these
7619 // alter the disposition of the match result:
7620 // M=0x08 — return matched portion
7621 // R=0x10 — return rest after match
7622 // B=0x20 — return 1-based start index
7623 // E=0x40 — return 1-based end index
7624 // N=0x80 — return match length
7625 // S=0x04 — substring search (anywhere) instead of anchored
7626 // Read once and consume so subsequent paramsubst calls see
7627 // a clean slate — direct port of subst.c flag-loop pattern.
7628 let (sub_match, sub_rest, sub_bind, sub_eind, sub_len, _sub_substr) = {
7629 let f = crate::ported::subst::sub_flags_get();
7630 crate::ported::subst::sub_flags_set(0);
7631 (
7632 (f & 0x0008) != 0, // c:2171 M
7633 (f & 0x0010) != 0, // c:2174 R
7634 (f & 0x0020) != 0, // c:2177 B
7635 (f & 0x0040) != 0, // c:2180 E
7636 (f & 0x0080) != 0, // c:2183 N
7637 (f & 0x0004) != 0, // c:2186 S
7638 )
7639 };
7640 // Both pattern and replacement get parameter / cmd-subst /
7641 // arith expansion before use (zsh semantics — `${s/$pat/X}`
7642 // resolves $pat).
7643 // Untokenize before pattern compile — zsh's lexer leaves
7644 // Snull/DQ markers and meta-encoded metachars in the
7645 // pattern stream. regex::Regex::new errors on those bytes,
7646 // and even when it compiles, it matches against tokenized
7647 // text rather than the user's literal pattern. Direct port
7648 // of bin_test's `untokenize(pattern)` call before patcompile.
7649 let pattern = crate::ported::subst::singsub(&pattern_raw);
7650 let pattern = crate::lex::untokenize(&pattern);
7651 // Replacement: full singsub with skip_filesub so a literal
7652 // leading `~` in the replacement reaches the output as-is
7653 // (per zsh, `${var/#pat/~}` keeps the tilde — the
7654 // p10k / oh-my-zsh idiom of replacing `$HOME` with `~` for
7655 // display). Was using a hand-rolled `expand_no_tilde` that
7656 // only handled `$VAR` / `${VAR}` references, missing
7657 // `$(cmd)` and `$((expr))` in templates like
7658 // `\${var//foo/$(date +%s)}`.
7659 // Inline `singsub-with-skip_filesub` — C zsh sets the flag
7660 // inline before calling singsub rather than wrapping in a
7661 // helper. Direct port of the prefork SUB_FLAG | SKIP_FILESUB
7662 // pattern. PORT.md: no helpers without C counterpart.
7663 let repl = with_executor(|exec| {
7664 let saved = crate::ported::subst::SKIP_FILESUB.with(|c| c.get());
7665 crate::ported::subst::SKIP_FILESUB.with(|c| c.set(true));
7666 let r = crate::ported::subst::singsub(&repl_raw);
7667 crate::ported::subst::SKIP_FILESUB.with(|c| c.set(saved));
7668 if crate::ported::utils::errflag.load(std::sync::atomic::Ordering::Relaxed) != 0 {
7669 exec.set_last_status(1);
7670 }
7671 r
7672 });
7673 let repl = crate::lex::untokenize(&repl);
7674 // Strip backslash escapes from the pattern. zsh: `\X` in a
7675 // ${var/pat/repl} pattern means "literal X" — the backslash
7676 // is removed and X is used as a literal char (regardless of
7677 // whether X is a pattern metachar). Without this, `${a//\:/-}`
7678 // tried to match the literal "\:" in $a which never matched.
7679 // We preserve `\\` (literal backslash) and `\X` for X in the
7680 // pattern-meta set, since regex compile expects those raw.
7681 let pattern = {
7682 let mut out = String::with_capacity(pattern.len());
7683 let mut it = pattern.chars().peekable();
7684 while let Some(c) = it.next() {
7685 if c == '\\' {
7686 if let Some(&nx) = it.peek() {
7687 // For non-meta chars, drop the backslash.
7688 // For metas keep the escape so regex still
7689 // matches them literally below.
7690 // Keep escape only for actual zsh pattern
7691 // metachars (the ones that have special pattern
7692 // meaning). `.` is regex-meta but NOT zsh-meta,
7693 // so `\.` drops the backslash → literal `.`.
7694 if matches!(nx, '?' | '*' | '[' | ']' | '(' | ')' | '|' | '\\') {
7695 out.push(c);
7696 } else {
7697 out.push(nx);
7698 it.next();
7699 }
7700 } else {
7701 out.push(c);
7702 }
7703 } else {
7704 out.push(c);
7705 }
7706 }
7707 out
7708 };
7709 // Inline pattern flags `(#i)` / `(#l)` / `(#I)` / `(#b)` apply
7710 // to ${var//pat/repl}. `(#b)` enables backref capture: each
7711 // `(...)` group in the pattern becomes accessible via
7712 // `${match[N]}` (1-based) in the replacement. Per
7713 // Src/pattern.c — the C source uses `pat_pure` flags +
7714 // `pat_subme` arrays; the Rust port plumbs through
7715 // `regex::Captures` and writes `state.arrays["match"]`
7716 // before each replacement-string expansion.
7717 // Inline glob-flag pre-parse — direct call to patgetglobflags
7718 // + bit-mask extraction (matches C pattern.c:1066+ inline).
7719 let (pattern, case_insensitive_repl, _l_flag_repl, _approx_repl, backref_mode) =
7720 if let Some((bits, _assert, consumed)) =
7721 crate::ported::pattern::patgetglobflags(&pattern)
7722 {
7723 let ci = (bits & crate::ported::zsh_h::GF_IGNCASE) != 0;
7724 let l = (bits & crate::ported::zsh_h::GF_LCMATCHUC) != 0;
7725 let errs = bits & 0xff;
7726 let approx = if errs != 0 { Some(errs as u32) } else { None };
7727 let br = (bits & crate::ported::zsh_h::GF_BACKREF) != 0;
7728 (pattern[consumed..].to_string(), ci, l, approx, br)
7729 } else {
7730 (pattern.clone(), false, false, None, false)
7731 };
7732 // zsh patterns in ${var/pat/repl} support `?`, `*`, `[...]`,
7733 // anchored `#`/`%` (handled via op codes 2/3). Compile to a
7734 // regex for the actual matching; falls back to plain string
7735 // when the pattern has no glob metas (faster).
7736 // Include `(` as a glob trigger — zsh's `(...)` is a grouping
7737 // (with `|` for alternation). `${a/(?)/X}` should match like
7738 // `${a/?/X}` (paren is the group). Without `(` in the trigger
7739 // set, paren patterns fell into the literal-string path and
7740 // matched nothing.
7741 // `#` (and its `##` repetition pair) is an extendedglob
7742 // postfix metachar — `a##` = one-or-more `a`. Include it
7743 // in the trigger set so `${var//a##/X}` routes through the
7744 // regex compile path instead of the literal-string fallback.
7745 // Bare `#` alone is non-meta — but it's safe to over-trigger
7746 // here because the regex compiler escapes literals it can't
7747 // interpret as quantifier postfix anyway.
7748 let has_glob = pattern
7749 .chars()
7750 .any(|c| matches!(c, '?' | '*' | '[' | ']' | '(' | '#'));
7751 // backref_mode (set by `(#b)` / `(#m)` / `(#M)` flags) needs
7752 // per-match capture iteration so `$match[N]` / `$MATCH` /
7753 // `$MBEGIN` / `$MEND` resolve PER-replacement against the
7754 // current capture. The literal-string replace path skips
7755 // captures entirely, so MATCH stays empty. Force the regex
7756 // path when backref_mode is set even for literal patterns.
7757 let glob_re: Option<regex::Regex> = if has_glob || case_insensitive_repl || backref_mode {
7758 // Convert the glob pattern to a regex string:
7759 // ? → . (any single char)
7760 // * → .* (any seq)
7761 // [...] → kept as-is (regex char class)
7762 // ( ) → kept as regex group; | as alternation
7763 // other regex metas → escaped
7764 let mut re = String::with_capacity(pattern.len() * 2);
7765 let mut chars = pattern.chars().peekable();
7766 // `#` / `##` extendedglob postfix detector for the
7767 // BUILTIN_PARAM_REPLACE pattern compile. Matches the
7768 // same handling in subst_port::glob_to_regex_capturing
7769 // and exec.rs::glob_match_static — direct port of zsh's
7770 // pattern.c Pound/POUND2 cases. Used by zinit's
7771 // main-message-formatter pattern `[^\}]##` (one-or-
7772 // more non-`}`).
7773 let consume_postfix = |chars: &mut std::iter::Peekable<std::str::Chars>| -> Option<&'static str> {
7774 if chars.peek() == Some(&'#') {
7775 chars.next();
7776 if chars.peek() == Some(&'#') {
7777 chars.next();
7778 Some("+")
7779 } else {
7780 Some("*")
7781 }
7782 } else {
7783 None
7784 }
7785 };
7786 while let Some(c) = chars.next() {
7787 match c {
7788 '?' => {
7789 re.push('.');
7790 if let Some(q) = consume_postfix(&mut chars) { re.push_str(q); }
7791 }
7792 '*' => {
7793 re.push_str(".*");
7794 if let Some(q) = consume_postfix(&mut chars) { re.push_str(q); }
7795 }
7796 '[' => {
7797 // Pass through to the closing ']' (already
7798 // valid regex syntax for most char classes).
7799 // zsh uses BOTH `[!...]` and `[^...]` for class
7800 // negation; regex only accepts `^`. Translate
7801 // a leading `!` after `[` to `^`. Track escape
7802 // state so `[\]…]` (escaped `]` inside class)
7803 // doesn't terminate the class on the FIRST `]`.
7804 // Direct port of zsh's pattern.c P_BRACT_END:
7805 // a backslash-quoted `]` inside a class stays
7806 // literal. Used by hist-substring's
7807 // `[\][()|\\*?#<>~^]` pattern.
7808 re.push('[');
7809 if chars.peek() == Some(&'!') {
7810 chars.next();
7811 re.push('^');
7812 }
7813 // First-char `]` is literal in zsh and regex
7814 // (POSIX rule), so allow it without closing.
7815 let mut first = true;
7816 let mut escaped = false;
7817 while let Some(cc) = chars.next() {
7818 if escaped {
7819 re.push(cc);
7820 escaped = false;
7821 first = false;
7822 continue;
7823 }
7824 if cc == '\\' {
7825 re.push(cc);
7826 escaped = true;
7827 continue;
7828 }
7829 if cc == ']' && !first {
7830 re.push(cc);
7831 break;
7832 }
7833 re.push(cc);
7834 first = false;
7835 }
7836 if let Some(q) = consume_postfix(&mut chars) { re.push_str(q); }
7837 }
7838 '\\' => {
7839 // `\\(#e)` / `\\(#s)` — escaped backslash
7840 // followed by end/start anchor. After
7841 // expand_string's `\x00\` preprocessing,
7842 // this arrives as `\(#e)` (one backslash
7843 // already consumed as escape-marker). Per
7844 // zsh's pattern.c, `\\` in a pattern is
7845 // escape-backslash (literal `\`). When that
7846 // literal `\` is followed by `(#e)` /
7847 // `(#s)`, emit `\\$` / `\\^`. Detected
7848 // here as 5-char `\(#e)` (one `\` then
7849 // `(#e)` which the (#e) arm below would
7850 // otherwise treat as anchor with a literal
7851 // `(` — losing the backslash). Used by
7852 // zinit's `(#b)((*)\\(#e)|(*))`.
7853 let mut peek = chars.clone();
7854 let p1 = peek.next();
7855 let p2 = peek.next();
7856 let p3 = peek.next();
7857 let p4 = peek.next();
7858 if p1 == Some('(')
7859 && p2 == Some('#')
7860 && (p3 == Some('e') || p3 == Some('s'))
7861 && p4 == Some(')')
7862 {
7863 re.push_str("\\\\");
7864 chars.next(); chars.next(); chars.next(); chars.next();
7865 re.push(if p3 == Some('e') { '$' } else { '^' });
7866 continue;
7867 }
7868 re.push('\\');
7869 if let Some(next) = chars.next() {
7870 re.push(next);
7871 }
7872 }
7873 // `(#e)` / `(#s)` end/start anchors — direct port
7874 // of zsh's pattern.c P_EOL / P_BOL tokens. 4-char
7875 // lookahead detects them; emit regex `$` / `^`.
7876 // Used by zinit's
7877 // `(#b)((*)\\(#e)|(*))` array-replace pattern.
7878 '(' if {
7879 let mut peek = chars.clone();
7880 let p1 = peek.next();
7881 let p2 = peek.next();
7882 let p3 = peek.next();
7883 p1 == Some('#')
7884 && (p2 == Some('e') || p2 == Some('s'))
7885 && p3 == Some(')')
7886 } =>
7887 {
7888 chars.next(); // consume '#'
7889 let kind = chars.next().unwrap(); // 'e' or 's'
7890 chars.next(); // consume ')'
7891 re.push(if kind == 'e' { '$' } else { '^' });
7892 }
7893 // `(`, `|` are zsh group/alternation operators
7894 // — keep them as regex equivalents. `)` may be
7895 // followed by `#`/`##` postfix applied to the
7896 // closed group (e.g. `(foo|bar)##` = one-or-more
7897 // of foo/bar).
7898 '(' | '|' => re.push(c),
7899 ')' => {
7900 re.push(c);
7901 if let Some(q) = consume_postfix(&mut chars) { re.push_str(q); }
7902 }
7903 // Regex meta chars that are NOT glob metas — escape
7904 // so the regex compiler treats them literally.
7905 '.' | '+' | '^' | '$' | '{' | '}' => {
7906 re.push('\\');
7907 re.push(c);
7908 }
7909 _ => {
7910 re.push(c);
7911 if let Some(q) = consume_postfix(&mut chars) { re.push_str(q); }
7912 }
7913 }
7914 }
7915 // Apply `(#i)` case-insensitive flag if it was present
7916 // in the original pattern. Same `(?i)` prefix as
7917 // glob_match_static uses.
7918 let final_re = if case_insensitive_repl {
7919 format!("(?i){}", re)
7920 } else {
7921 re
7922 };
7923 regex::Regex::new(&final_re).ok()
7924 } else {
7925 None
7926 };
7927 let one = |val: String| -> String {
7928 // SUB_M/R/B/E/N short-circuit — alter the disposition
7929 // before doing the actual replacement. Direct port of
7930 // zsh's getmatch() which returns one of these views
7931 // instead of the substituted string when the bit is set.
7932 // Matched-portion / rest / position / length variants
7933 // all skip the replacement template entirely.
7934 let any_disposition = sub_match || sub_rest || sub_bind || sub_eind || sub_len;
7935 if any_disposition {
7936 if let Some(ref rx) = glob_re {
7937 if let Some(m) = rx.find(&val) {
7938 if sub_match { return m.as_str().to_string(); }
7939 if sub_rest { return val[m.end()..].to_string(); }
7940 if sub_bind { return (m.start() + 1).to_string(); }
7941 if sub_eind { return m.end().to_string(); }
7942 if sub_len { return (m.end() - m.start()).to_string(); }
7943 } else {
7944 // No match: M/R return empty, B/E/N return 0.
7945 if sub_match || sub_rest { return String::new(); }
7946 return "0".to_string();
7947 }
7948 } else if let Some(pos) = val.find(pattern.as_str()) {
7949 let end = pos + pattern.len();
7950 if sub_match { return pattern.clone(); }
7951 if sub_rest { return val[end..].to_string(); }
7952 if sub_bind { return (pos + 1).to_string(); }
7953 if sub_eind { return end.to_string(); }
7954 if sub_len { return pattern.len().to_string(); }
7955 } else {
7956 if sub_match || sub_rest { return String::new(); }
7957 return "0".to_string();
7958 }
7959 }
7960 if let Some(ref rx) = glob_re {
7961 // Helper that runs ONE replacement: takes the
7962 // captures, populates `state.arrays["match"]`
7963 // (1-based indexing), then expands the replacement
7964 // template via `expand_string` so `$match[N]` in
7965 // the template resolves to the just-captured group.
7966 // Mirrors C zsh's pat_subme + addbackref handling
7967 // around Src/pattern.c (pattry, patmatch).
7968 let expand_repl_with_caps = |caps: ®ex::Captures| -> String {
7969 if backref_mode {
7970 with_executor(|exec| {
7971 // `(#b)` — per-group captures into `match[N]`
7972 // (1-based array). Also seed `MATCH` with the
7973 // whole-match text so `(#m)` plus `$MATCH` in
7974 // the replacement returns the matched portion.
7975 // Direct port of Src/pattern.c addbackref +
7976 // pat_pure_m which sets both views.
7977 let mut arr = Vec::with_capacity(caps.len());
7978 let mut begins = Vec::with_capacity(caps.len());
7979 let mut ends = Vec::with_capacity(caps.len());
7980 for i in 1..caps.len() {
7981 if let Some(m) = caps.get(i) {
7982 arr.push(m.as_str().to_string());
7983 begins.push((m.start() + 1).to_string());
7984 ends.push(m.end().to_string());
7985 } else {
7986 arr.push(String::new());
7987 begins.push("0".to_string());
7988 ends.push("0".to_string());
7989 }
7990 }
7991 exec.set_array("match".to_string(), arr);
7992 // mbegin/mend arrays — 1-based start
7993 // and end positions of each capture
7994 // group. Direct port of zsh's
7995 // pat_pure_m population.
7996 exec.set_array("mbegin".to_string(), begins);
7997 exec.set_array("mend".to_string(), ends);
7998 if let Some(m0) = caps.get(0) {
7999 exec.set_scalar("MATCH".to_string(), m0.as_str().to_string());
8000 exec.set_scalar("MBEGIN".to_string(), (m0.start() + 1).to_string());
8001 exec.set_scalar("MEND".to_string(), m0.end().to_string());
8002 }
8003 });
8004 crate::ported::subst::singsub(&repl_raw)
8005 } else {
8006 repl.clone()
8007 }
8008 };
8009 match op {
8010 0 => {
8011 if backref_mode {
8012 // `replacen` doesn't expose Captures —
8013 // reimplement: find first match, expand
8014 // replacement from its caps, splice.
8015 if let Some(caps) = rx.captures(&val) {
8016 let m = caps.get(0).unwrap();
8017 let r = expand_repl_with_caps(&caps);
8018 return format!("{}{}{}", &val[..m.start()], r, &val[m.end()..]);
8019 }
8020 val
8021 } else {
8022 rx.replacen(&val, 1, repl.as_str()).to_string()
8023 }
8024 }
8025 1 => {
8026 if backref_mode {
8027 // Iterate each match, build output piecewise.
8028 let mut out = String::with_capacity(val.len());
8029 let mut last = 0usize;
8030 for caps in rx.captures_iter(&val) {
8031 let m = caps.get(0).unwrap();
8032 out.push_str(&val[last..m.start()]);
8033 let r = expand_repl_with_caps(&caps);
8034 out.push_str(&r);
8035 last = m.end();
8036 }
8037 out.push_str(&val[last..]);
8038 out
8039 } else {
8040 rx.replace_all(&val, repl.as_str()).to_string()
8041 }
8042 }
8043 2 => {
8044 // Anchored prefix: only match at start.
8045 if let Some(caps) = rx.captures(&val) {
8046 let m = caps.get(0).unwrap();
8047 if m.start() == 0 {
8048 let r = if backref_mode {
8049 expand_repl_with_caps(&caps)
8050 } else {
8051 repl.clone()
8052 };
8053 return format!("{}{}", r, &val[m.end()..]);
8054 }
8055 }
8056 val
8057 }
8058 3 => {
8059 // Anchored suffix: last match whose end is val.len().
8060 let mut last_caps: Option<regex::Captures> = None;
8061 for caps in rx.captures_iter(&val) {
8062 let m = caps.get(0).unwrap();
8063 if m.end() == val.len() {
8064 last_caps = Some(caps);
8065 }
8066 }
8067 if let Some(caps) = last_caps {
8068 let m = caps.get(0).unwrap();
8069 let r = if backref_mode {
8070 expand_repl_with_caps(&caps)
8071 } else {
8072 repl.clone()
8073 };
8074 return format!("{}{}", &val[..m.start()], r);
8075 }
8076 val
8077 }
8078 _ => val,
8079 }
8080 } else {
8081 match op {
8082 0 => val.replacen(&pattern, &repl, 1),
8083 1 => val.replace(&pattern, &repl),
8084 2 => {
8085 if val.starts_with(&pattern) {
8086 format!("{}{}", repl, &val[pattern.len()..])
8087 } else {
8088 val
8089 }
8090 }
8091 3 => {
8092 if val.ends_with(&pattern) {
8093 format!("{}{}", &val[..val.len() - pattern.len()], repl)
8094 } else {
8095 val
8096 }
8097 }
8098 _ => val,
8099 }
8100 }
8101 };
8102 // Array case: per-element replacement (default), or
8103 // join-then-replace when in DQ context. zsh: `"${a/o/O}"`
8104 // for `a=(one two three)` joins to "one two three", then
8105 // does the FIRST replacement only -> "One two three".
8106 // Unquoted `${a/o/O}` per-element first -> "One twO three".
8107 let arr_val = with_executor(|exec| exec.array(&name));
8108 if let Some(arr) = arr_val {
8109 if dq_flag {
8110 let joined = arr.join(" ");
8111 return fusevm::Value::str(one(joined));
8112 }
8113 let mapped: Vec<fusevm::Value> = arr
8114 .into_iter()
8115 .map(|s| fusevm::Value::str(one(s)))
8116 .collect();
8117 return fusevm::Value::Array(mapped);
8118 }
8119 let val = with_executor(|exec| exec.get_variable(&name));
8120 fusevm::Value::str(one(val))
8121 });
8122
8123 vm.register_builtin(BUILTIN_REGISTER_COMPILED_FN, |vm, argc| {
8124 let args = pop_args(vm, argc);
8125 let mut iter = args.into_iter();
8126 let name = iter.next().unwrap_or_default();
8127 let body_b64 = iter.next().unwrap_or_default();
8128 let body_source = iter.next().unwrap_or_default();
8129 let line_base_str = iter.next().unwrap_or_default();
8130 let line_base: i64 = line_base_str.parse().unwrap_or(0);
8131 let bytes = base64_decode(&body_b64);
8132 let status = match bincode::deserialize::<fusevm::Chunk>(&bytes) {
8133 Ok(chunk) => with_executor(|exec| {
8134 let def_file = exec.scriptfilename.clone();
8135 if !body_source.is_empty() {
8136 exec.function_source.insert(name.clone(), body_source.clone());
8137 }
8138 exec.function_line_base.insert(name.clone(), line_base);
8139 exec.function_def_file.insert(name.clone(), def_file);
8140 // PFA-SMR aspect: every `name() {}` / `function name { }`
8141 // funnels through here at compile time. Emit one record
8142 // with the function name + raw body source.
8143 #[cfg(feature = "recorder")]
8144 if crate::recorder::is_enabled() {
8145 let ctx = exec.recorder_ctx();
8146 let body = if body_source.is_empty() {
8147 None
8148 } else {
8149 Some(body_source.as_str())
8150 };
8151 crate::recorder::emit_function(&name, body, ctx);
8152 }
8153 // Mirror into canonical shfunctab so scanfunctions /
8154 // ${(k)functions} / functions builtin see user defs.
8155 // C: exec.c:funcdef → shfunctab->addnode(ztrdup(name),shf).
8156 if let Ok(mut tab) =
8157 crate::ported::hashtable::shfunctab_lock().write()
8158 {
8159 let shf = crate::ported::hashtable::shfunc_with_body(
8160 &name,
8161 &body_source,
8162 );
8163 tab.add(shf);
8164 }
8165 exec.functions_compiled.insert(name, chunk);
8166 0
8167 }),
8168 Err(_) => 1,
8169 };
8170 Value::Status(status)
8171 });
8172
8173 // Wire the ShellHost so direct shell ops (Op::Glob, Op::TildeExpand,
8174 // Op::ExpandParam, Op::CmdSubst, Op::CallFunction, etc.) route through
8175 // ZshrsHost back into the executor.
8176 vm.set_shell_host(Box::new(ZshrsHost));
8177}
8178
8179impl ZshrsHost {
8180 /// True iff `c` can be a `(j:…:)` / `(s:…:)` delimiter — non-alphanumeric,
8181 /// non-underscore. Restricting to punctuation avoids `(jL)` consuming `L`
8182 /// as a delim instead of as the next flag.
8183 fn is_zsh_flag_delim(c: char) -> bool {
8184 !c.is_ascii_alphanumeric() && c != '_'
8185 }
8186}
8187
8188fn expand_dollar_refs(s: &str, exec: &crate::ported::exec::ShellExecutor) -> String {
8189 // Single-pass `$VAR` / `${VAR}` expansion for subscript bodies.
8190 // Mirrors the small subset of paramsubst needed when the BUILTIN_
8191 // PARAM_LENGTH handler resolves `${#arr[$i]}`.
8192 let bytes: Vec<char> = s.chars().collect();
8193 let mut out = String::with_capacity(s.len());
8194 let mut i = 0;
8195 while i < bytes.len() {
8196 if bytes[i] != '$' {
8197 out.push(bytes[i]);
8198 i += 1;
8199 continue;
8200 }
8201 if i + 1 >= bytes.len() {
8202 out.push('$');
8203 i += 1;
8204 continue;
8205 }
8206 let next = bytes[i + 1];
8207 if next == '{' {
8208 if let Some(close) = bytes[i + 2..].iter().position(|&c| c == '}') {
8209 let name: String = bytes[i + 2..i + 2 + close].iter().collect();
8210 out.push_str(&exec.get_variable(&name));
8211 i += 2 + close + 1;
8212 continue;
8213 }
8214 }
8215 if next.is_ascii_alphabetic() || next == '_' {
8216 let start = i + 1;
8217 let mut end = start;
8218 while end < bytes.len() && (bytes[end].is_ascii_alphanumeric() || bytes[end] == '_') {
8219 end += 1;
8220 }
8221 let name: String = bytes[start..end].iter().collect();
8222 out.push_str(&exec.get_variable(&name));
8223 i = end;
8224 continue;
8225 }
8226 out.push('$');
8227 i += 1;
8228 }
8229 out
8230}
8231
8232fn pop_args(vm: &mut fusevm::VM, argc: u8) -> Vec<String> {
8233 let mut popped: Vec<fusevm::Value> = Vec::with_capacity(argc as usize);
8234 for _ in 0..argc {
8235 popped.push(vm.pop());
8236 }
8237 popped.reverse();
8238 let mut args: Vec<String> = Vec::with_capacity(popped.len());
8239 for v in popped {
8240 match v {
8241 fusevm::Value::Array(items) => {
8242 for item in items {
8243 args.push(item.to_str());
8244 }
8245 }
8246 other => args.push(other.to_str()),
8247 }
8248 }
8249 // `expand_glob` set the glob-failed cell when a no-match glob in
8250 // this command's argv triggered the `nomatch` error. For BUILTIN
8251 // commands (zsh: errflag persists in the shell process), the
8252 // entire script aborts with status 1 — `echo /no_match_*` exits
8253 // before printing anything. External commands hit the same flag
8254 // in `host_exec_external` instead, which only fails the command
8255 // and lets the script continue (zsh's fork inherits-but-resets
8256 // errflag semantics). We only land here for builtins, so abort.
8257 let glob_failed = with_executor(|exec| {
8258 let f = exec.current_command_glob_failed.get();
8259 if f {
8260 exec.current_command_glob_failed.set(false);
8261 exec.set_last_status(1);
8262 }
8263 f
8264 });
8265 if glob_failed {
8266 std::process::exit(1);
8267 }
8268 // `$_` tracks the last argument of the PREVIOUSLY executed
8269 // command (zsh / bash convention). Promote the deferred value
8270 // into `$_` BEFORE this command runs (so `echo $_` reads the
8271 // prior command's last arg) then stash THIS command's last arg
8272 // for the next dispatch.
8273 let new_last = args.last().cloned();
8274 with_executor(|exec| {
8275 if let Some(prev) = exec.pending_underscore.take() {
8276 exec.set_scalar("_".to_string(), prev);
8277 }
8278 if let Some(last) = new_last {
8279 exec.pending_underscore = Some(last);
8280 }
8281 });
8282 args
8283}
8284
8285/// zsh dispatch order is alias → function → builtin → external. The
8286/// compiler emits direct CallBuiltin ops for known builtin names for
8287/// perf, which silently skips a user function that shadows the same
8288/// name (e.g. `echo() { ... }; echo hi` would run the C builtin
8289/// without this check). Returns Some(status) when the call is routed
8290/// to the user function; the builtin handler should fall through to
8291/// its native impl when None.
8292fn try_user_fn_override(name: &str, args: &[String]) -> Option<i32> {
8293 let has_fn = with_executor(|exec| {
8294 exec.functions_compiled.contains_key(name) || exec.function_exists(name)
8295 });
8296 if !has_fn {
8297 return None;
8298 }
8299 Some(with_executor(|exec| {
8300 exec.dispatch_function_call(name, args).unwrap_or(127)
8301 }))
8302}
8303
8304// IDs 281 (was BUILTIN_EXPAND_WORD_RUNTIME) and 282 (was
8305// BUILTIN_REGISTER_FUNCTION) were legacy JSON-AST bridges. ZshCompiler
8306// emits BUILTIN_EXPAND_TEXT (314) and BUILTIN_REGISTER_COMPILED_FN
8307// (305) instead. The IDs stay reserved in this gap so future builtins
8308// don't reuse them.
8309
8310/// Builtin ID for `${name}` reads — routes through `ShellExecutor::get_variable`
8311/// which knows about special params (`$?`, `$@`, `$#`, `$1..$9`), shell vars
8312/// (`self.variables`), arrays, and env. Replaces emission of `Op::GetVar` for
8313/// shell variable names so nested VMs (function calls) see the same storage.
8314pub const BUILTIN_GET_VAR: u16 = 283;
8315
8316/// Builtin ID for `name=value` assignments — pops [name, value] and stores
8317/// into `executor.variables`. Replaces `Op::SetVar` emission for the same
8318/// reason: the storage must be visible to both bytecode and tree-walker code,
8319/// across nested VM boundaries.
8320pub const BUILTIN_SET_VAR: u16 = 284;
8321
8322/// Builtin ID for pipeline execution. Pops N sub-chunk indices from the stack;
8323/// each index points into `vm.chunk.sub_chunks` (compiled stage bodies). Forks
8324/// N children, wires stdin/stdout between them via pipes, runs each stage's
8325/// bytecode on a fresh VM in its child, parent waits for all and pushes the
8326/// last stage's exit status. This is bytecode-native pipeline execution —
8327/// no tree-walker delegation.
8328pub const BUILTIN_RUN_PIPELINE: u16 = 285;
8329
8330/// Builtin ID for `Array → String` joining. Pops one value: if it's an Array,
8331/// joins its string-coerced elements with a single space; otherwise passes
8332/// through. Used after `Op::Glob` to convert the pattern's matched paths into
8333/// the single argv-token form the bytecode word model expects (no per-word
8334/// splitting yet — that's a future phase).
8335pub const BUILTIN_ARRAY_JOIN: u16 = 286;
8336
8337/// Builtin ID for `cmd &` background execution. IDs 287/288/289 are reserved
8338/// for the planned array work in Phase G1 (SET_ARRAY/SET_ASSOC/ARRAY_INDEX),
8339/// so this lands at 290. Pops one sub-chunk index; forks; child detaches
8340/// (`setsid`), runs the sub-chunk on a fresh VM, exits with last_status; parent
8341/// returns Status(0) immediately. Job-table registration (so `jobs`/`fg`/`wait`
8342/// can see the pid) is deferred to Phase G6 — fire-and-forget for now.
8343pub const BUILTIN_RUN_BG: u16 = 290;
8344
8345/// Indexed-array assignment: `arr=(a b c)`. Compile_simple emits N element
8346/// pushes followed by name push, then `CallBuiltin(BUILTIN_SET_ARRAY, N+1)`.
8347/// The handler pops args (last popped = name in our pushing order) and stores
8348/// `Vec<String>` into `executor.arrays`. Tree-walker callers see the same
8349/// storage. Any prior scalar binding in `executor.variables` for `name` is
8350/// removed so `${name}` (scalar context) consistently reflects the array's
8351/// first element via `get_variable`.
8352pub const BUILTIN_SET_ARRAY: u16 = 287;
8353
8354/// Single-key set on an associative array: `foo[key]=val`. Stack (top-down):
8355/// [name, key, value]. Stores `value` into `executor.assoc_arrays[name][key]`,
8356/// creating the outer entry if missing. compile_simple detects `var[...]=...`
8357/// in assignments and emits this builtin.
8358pub const BUILTIN_SET_ASSOC: u16 = 288;
8359
8360/// `${arr[idx]}` — single-element array index. Pops two args:
8361/// stack: [name, idx_str]
8362/// Returns the indexed element as Value::str. Indexing semantics: zsh is
8363/// 1-based by default; bash is 0-based. We follow zsh.
8364/// Special idx values: `@` and `*` return the whole array as Value::Array
8365/// (which fuses correctly via the Op::Exec splice for argv splice).
8366pub const BUILTIN_ARRAY_INDEX: u16 = 289;
8367
8368/// `${#arr[@]}` and `${#arr}` (when arr is an array name) — array length.
8369/// Pops one arg: name. Returns Value::str of len.
8370pub const BUILTIN_ARRAY_LENGTH: u16 = 291;
8371
8372/// `${arr[@]}` — splice all elements as a Value::Array. Pops one arg: name.
8373/// The Array gets flattened by Op::Exec/ExecBg/CallFunction into argv.
8374pub const BUILTIN_ARRAY_ALL: u16 = 292;
8375
8376/// Flatten one level of Value::Array nesting. Pops N values; for each, if it's
8377/// a Value::Array, its elements are appended directly; otherwise the value is
8378/// appended as-is. Pushes a single Value::Array of the flattened result. Used
8379/// by the for-loop word-list compile path: when a word like `${arr[@]}`
8380/// produces a nested Array, this lets `for i in ${arr[@]}` iterate over the
8381/// inner elements rather than the outer single-element array.
8382pub const BUILTIN_ARRAY_FLATTEN: u16 = 293;
8383
8384/// `coproc [name] { body }` — bidirectional pipe to async child. Pops a name
8385/// (optional, "" for default) and a sub-chunk index. Creates two pipes, forks,
8386/// child redirects its fd 0/1 to the inner ends and runs the body, parent
8387/// stores [write_fd, read_fd] into the named array (default `COPROC`). Caller
8388/// closes the fds and `wait`s when done. Job-table integration deferred to
8389/// Phase G6 alongside the bg `&` work.
8390pub const BUILTIN_RUN_COPROC: u16 = 294;
8391
8392/// `arr+=(d e f)` — append N elements to an existing indexed array. Compile
8393/// emits N element pushes + name push, then `CallBuiltin(295, N+1)`. Handler
8394/// drains args (last popped = name), extends `executor.arrays[name]` (creates
8395/// the entry if missing). Mirrors zsh's `+=` semantics for indexed arrays.
8396pub const BUILTIN_APPEND_ARRAY: u16 = 295;
8397
8398/// `select var in words; do body; done` — interactive numbered-menu loop.
8399/// Compile emits N word pushes + var-name push + sub-chunk index push, then
8400/// `CallBuiltin(296, N+2)`. Handler prints `1) word1\n2) word2\n...` to
8401/// stderr, prints `$PROMPT3` (default `?# `) to stderr, reads a line from
8402/// stdin. On EOF returns 0. On a valid 1-based number, sets `var` to the
8403/// chosen word, runs the sub-chunk, then redisplays the menu and loops. On
8404/// invalid input redraws the menu without running the body. `break` from
8405/// inside the body exits the loop (handled by the body's own bytecode).
8406pub const BUILTIN_RUN_SELECT: u16 = 296;
8407
8408/// `m[k]+=value` — append onto an existing assoc-array value (string concat).
8409/// If the key doesn't exist, behaves like SET_ASSOC. Stack: [name, key, value].
8410pub const BUILTIN_APPEND_ASSOC: u16 = 298;
8411
8412/// `break` from inside a body that runs on a sub-VM (select, future loop-via-
8413/// builtin constructs). Sets `executor.loop_signal = Some(LoopSignal::Break)`.
8414/// Outer-loop builtins drain the flag after each body run and exit early.
8415pub const BUILTIN_SET_BREAK: u16 = 299;
8416
8417/// `continue` from inside a sub-VM body. Sets the signal to Continue. Outer
8418/// loop builtins drain + skip-to-next-iteration.
8419pub const BUILTIN_SET_CONTINUE: u16 = 300;
8420
8421/// Brace expansion: `{a,b,c}` → 3 values, `{1..5}` → 5 values, `{01..05}` →
8422/// zero-padded numerics, `{a..e}` → letter range. Pops one string, returns
8423/// Value::Array of expansions (empty array → original string preserved).
8424pub const BUILTIN_BRACE_EXPAND: u16 = 301;
8425
8426/// Glob qualifier filter: `*(qualifier)` filters glob results by predicate.
8427/// Pops [pattern, qualifier_string]. Returns Value::Array of matching paths.
8428pub const BUILTIN_GLOB_QUALIFIED: u16 = 302;
8429
8430/// Re-export the regex_match host method as a builtin so `[[ s =~ pat ]]`
8431/// works even when fusevm's Op::RegexMatch isn't routed (compat fallback).
8432pub const BUILTIN_REGEX_MATCH: u16 = 303;
8433
8434/// Word-split a string on IFS (default: whitespace). Pops one string,
8435/// returns Value::Array of fields. Used in array-literal context where
8436/// `arr=($(cmd))` should expand cmd's stdout into multiple elements.
8437pub const BUILTIN_WORD_SPLIT: u16 = 304;
8438
8439/// Register a pre-compiled fusevm chunk as a function. Stack: [name,
8440/// base64-bincode-of-Chunk]. Used by compile_zsh's compile_funcdef to
8441/// register functions parsed via parse_init+parse without going through the
8442/// ShellCommand JSON serialization path.
8443pub const BUILTIN_REGISTER_COMPILED_FN: u16 = 305;
8444pub const BUILTIN_VAR_EXISTS: u16 = 306;
8445/// Phase 1 native param-modifier builtins. Each takes a fixed argv shape
8446/// and returns the modified value as Value::Str. Replaces the runtime
8447/// ShellWord round-trip via BUILTIN_EXPAND_WORD_RUNTIME for the common
8448/// shapes.
8449///
8450/// `${var:-default}` / `${var:=default}` / `${var:?error}` / `${var:+alt}`
8451/// — pop [name, op_byte, rhs]. op_byte: 0=`:-`, 1=`:=`, 2=`:?`, 3=`:+`.
8452pub const BUILTIN_PARAM_DEFAULT_FAMILY: u16 = 307;
8453/// `${var:offset[:length]}` — pop [name, offset, length] (length=-1 means
8454/// "rest of value"; negative offset counts from end).
8455pub const BUILTIN_PARAM_SUBSTRING: u16 = 308;
8456/// `${var#pat}` / `${var##pat}` / `${var%pat}` / `${var%%pat}` — pop
8457/// [name, pattern, op_byte]. op_byte: 0=`#`, 1=`##`, 2=`%`, 3=`%%`.
8458pub const BUILTIN_PARAM_STRIP: u16 = 309;
8459/// `${var/pat/repl}` / `${var//pat/repl}` / `${var/#pat/repl}` /
8460/// `${var/%pat/repl}` — pop [name, pattern, replacement, op_byte].
8461/// op_byte: 0=first, 1=all, 2=anchor-prefix, 3=anchor-suffix.
8462pub const BUILTIN_PARAM_REPLACE: u16 = 310;
8463/// `${#name}` — character length of a scalar value, or element count
8464/// of an indexed/assoc array. Pops \[name\], returns count as Value::Str.
8465pub const BUILTIN_PARAM_LENGTH: u16 = 311;
8466/// `$((expr))` arithmetic substitution. Pops \[expr_string\], evaluates
8467/// via the executor's MathEval (integer-aware), returns result as
8468/// Value::Str. Bypasses ArithCompiler's float-only Op::Div path so
8469/// `$((10/3))` returns "3" not "3.333...".
8470pub const BUILTIN_ARITH_EVAL: u16 = 312;
8471/// `$(cmd)` command substitution. Pops \[cmd_string\], runs through
8472/// `run_command_substitution` which compiles via parse_init+parse + ZshCompiler
8473/// and captures stdout via an in-process pipe. Returns trimmed output
8474/// as Value::Str. Avoids the sub-chunk word-emit quoting bug in the
8475/// raw Op::CmdSubst path.
8476pub const BUILTIN_CMD_SUBST_TEXT: u16 = 313;
8477/// Text-based word expansion. Pops \[preserved_text\]: the word with
8478/// quotes preserved (Dnull→`"`, Snull→`'`, Bnull→`\`), runs
8479/// `expand_string` (variable + cmd-sub + arith) then `xpandbraces`
8480/// then `expand_glob`. Returns Value::str (single match) or
8481/// Value::Array (multi-match brace/glob).
8482pub const BUILTIN_EXPAND_TEXT: u16 = 314;
8483
8484/// `[[ a -ef b ]]` — same-inode test. Stack: [a, b]. Pushes Bool true iff
8485/// both paths resolve to the same `(dev, inode)` pair (zsh + bash semantics).
8486pub const BUILTIN_SAME_FILE: u16 = 315;
8487
8488/// `[[ a -nt b ]]` — file `a` newer than file `b` (mtime strict).
8489/// Stack: [path_a, path_b]. Pushes Bool. zsh-compatible "missing"
8490/// rules: if both exist, compare mtime; if only `a` exists → true;
8491/// otherwise false.
8492pub const BUILTIN_FILE_NEWER: u16 = 324;
8493
8494/// `[[ a -ot b ]]` — mirror of `-nt`. If both exist, compare mtime;
8495/// if only `b` exists → true; otherwise false.
8496pub const BUILTIN_FILE_OLDER: u16 = 325;
8497
8498/// `[[ -k path ]]` — sticky bit (S_ISVTX) set on path.
8499pub const BUILTIN_HAS_STICKY: u16 = 326;
8500/// `[[ -u path ]]` — setuid bit (S_ISUID).
8501pub const BUILTIN_HAS_SETUID: u16 = 327;
8502/// `[[ -g path ]]` — setgid bit (S_ISGID).
8503pub const BUILTIN_HAS_SETGID: u16 = 328;
8504/// `[[ -O path ]]` — owned by effective UID.
8505pub const BUILTIN_OWNED_BY_USER: u16 = 329;
8506/// `[[ -G path ]]` — owned by effective GID.
8507pub const BUILTIN_OWNED_BY_GROUP: u16 = 330;
8508/// `[[ -N path ]]` — file modified since last accessed (atime <= mtime).
8509pub const BUILTIN_FILE_MODIFIED_SINCE_ACCESS: u16 = 341;
8510
8511/// `name+=val` (no parens) — runtime-dispatched append.
8512/// If name is an indexed array → push val as element.
8513/// If name is an assoc array → error (zsh requires `(k v)` form).
8514/// Else → scalar concat (existing SET_VAR behavior).
8515pub const BUILTIN_APPEND_SCALAR_OR_PUSH: u16 = 331;
8516
8517/// `[[ -c path ]]` — character device.
8518pub const BUILTIN_IS_CHARDEV: u16 = 332;
8519/// `[[ -b path ]]` — block device.
8520pub const BUILTIN_IS_BLOCKDEV: u16 = 333;
8521/// `[[ -p path ]]` — FIFO / named pipe.
8522pub const BUILTIN_IS_FIFO: u16 = 334;
8523/// `[[ -S path ]]` — socket.
8524pub const BUILTIN_IS_SOCKET: u16 = 335;
8525pub const BUILTIN_ERREXIT_CHECK: u16 = 336;
8526pub const BUILTIN_PARAM_SUBSTRING_EXPR: u16 = 337;
8527pub const BUILTIN_XTRACE_LINE: u16 = 338;
8528pub const BUILTIN_ARRAY_JOIN_STAR: u16 = 339;
8529pub const BUILTIN_SET_RAW_OPT: u16 = 340;
8530
8531/// `time { compound; ... }` — wall-clock-time the sub-chunk and print
8532/// elapsed seconds. Stack: [sub_chunk_idx as Int]. Runs the sub-chunk
8533/// on the current VM (so positional/local state is shared) and prints
8534/// the timing summary to stderr in zsh's format. Pushes Status.
8535pub const BUILTIN_TIME_SUBLIST: u16 = 316;
8536
8537/// `{name}>file` / `{name}<file` / `{name}>>file` — named-fd allocation.
8538/// Stack: [path, varid, op_byte]. Opens `path` per `op_byte`, gets the
8539/// new fd (≥10 in zsh; we use libc::open with O_CLOEXEC bit cleared so
8540/// the inherited fd survives Command::new spawns), stores the fd number
8541/// as a string in `$varid`. Pushes Status (0 success, 1 error).
8542pub const BUILTIN_OPEN_NAMED_FD: u16 = 317;
8543
8544/// Word-segment concat that does cartesian-product distribution over
8545/// arrays. Stack: [lhs, rhs]. Used for RC_EXPAND_PARAM `${arr}` and
8546/// explicit-distribute forms (`${^arr}`, `${(@)…}`).
8547///
8548/// - both scalar: `Value::str(a + b)` (fast path, identical to Op::Concat)
8549/// - lhs Array, rhs scalar: `Value::Array([a + rhs for a in lhs])`
8550/// - lhs scalar, rhs Array: `Value::Array([lhs + b for b in rhs])`
8551/// - both Array: cartesian product `[a + b for a in lhs for b in rhs]`
8552pub const BUILTIN_CONCAT_DISTRIBUTE: u16 = 318;
8553
8554/// Forced-distribute concat — like `BUILTIN_CONCAT_DISTRIBUTE` but
8555/// always distributes cartesian regardless of the `rcexpandparam`
8556/// option. Emitted by the segments fast-path when an
8557/// `is_distribute_expansion` segment is present (`${^arr}`,
8558/// `${(@)arr}`, `${(s.…)arr}` etc.) per zsh: the source-level
8559/// distribution flag overrides the option default.
8560/// Direct port of Src/subst.c:1875 `case Hat: nojoin = 1` and the
8561/// `rcexpandparam` test bypass for the explicit-distribute flags.
8562pub const BUILTIN_CONCAT_DISTRIBUTE_FORCED: u16 = 522;
8563
8564/// Capture current `last_status` into the `TRY_BLOCK_ERROR` variable.
8565/// Emitted between the try block and the always block of `{ … } always
8566/// { … }` so the finally arm can read $TRY_BLOCK_ERROR.
8567pub const BUILTIN_SET_TRY_BLOCK_ERROR: u16 = 320;
8568pub const BUILTIN_RESTORE_TRY_BLOCK_STATUS: u16 = 432;
8569pub const BUILTIN_BEGIN_INLINE_ENV: u16 = 433;
8570pub const BUILTIN_END_INLINE_ENV: u16 = 434;
8571
8572/// `[[ -o option ]]` — shell-option-set test. Stack: \[option_name\].
8573/// Normalizes the name (strip underscores, lowercase) and reads
8574/// `exec.options`. Pushes Bool.
8575pub const BUILTIN_OPTION_SET: u16 = 321;
8576
8577/// `${var:#pattern}` — array filter: remove elements matching `pattern`.
8578/// Stack: [name, pattern]. For scalar `var`, returns empty if it matches
8579/// the pattern, else the value. For array `var`, returns Array of
8580/// non-matching elements.
8581pub const BUILTIN_PARAM_FILTER: u16 = 322;
8582
8583/// `a[i]=(elements)` / `a[i,j]=(elements)` / `a[i]=()` —
8584/// subscripted-array assign with array-literal RHS. Stack:
8585/// [...elements, name, key]. Empty elements + single-int key `a[i]=()`
8586/// removes that element. Comma-key `a[i,j]=(...)` splices.
8587pub const BUILTIN_SET_SUBSCRIPT_RANGE: u16 = 323;
8588
8589/// `[[ -X file ]]` for unknown unary test op `-X`. Stack: \[op_name\].
8590/// Emits zsh's `unknown condition: -X` diagnostic to stderr and
8591/// pushes Bool(false). Without this, unknown conditions silently
8592/// returned false matching neither zsh's error format nor the
8593/// expected status code (zsh returns 2 for parse error).
8594pub const BUILTIN_UNKNOWN_COND: u16 = 324;
8595
8596/// `[[ -t fd ]]` — fd-is-a-tty check. Stack: \[fd_string\].
8597/// Routes through libc::isatty. Pushes Bool.
8598pub const BUILTIN_IS_TTY: u16 = 325;
8599
8600/// Update `$LINENO` to track the source line of the next statement.
8601/// Stack: \[n\] (the line number from `ZshPipe.lineno`). Direct port
8602/// of zsh's `lineno` global tracking (Src/input.c:330) — the
8603/// compiler emits one of these per top-level pipe so `$LINENO`
8604/// reflects the source position at runtime. ID 342 picked because
8605/// the previous `326` collided with `BUILTIN_HAS_STICKY` (the file
8606/// has several other duplicate IDs — 325 has two as well — but
8607/// fixing those is out of scope for this port).
8608pub const BUILTIN_SET_LINENO: u16 = 342;
8609
8610/// Pop a scalar from the VM stack, run expand_glob on it, push the
8611/// result as Value::Array. Used by the segment-concat compile path
8612/// when var refs concatenate with glob meta literals (`$D/*`,
8613/// `${prefix}*`, etc.) — those skip the bridge's pathname-expansion
8614/// pass and would otherwise leak the glob meta to argv as a literal.
8615pub const BUILTIN_GLOB_EXPAND: u16 = 343;
8616
8617/// Push a `CmdState` token onto the command-context stack. Direct
8618/// port of zsh's `cmdpush(int cmdtok)` (Src/prompt.c:1623). The
8619/// stack is consulted by `%_` in PS4/prompt expansion to produce
8620/// the cumulative control-flow-context labels (`if`, `then`,
8621/// `cmdand`, `cmdor`, `cmdsubst`, …) that `zsh -x` xtrace shows
8622/// in the trace prefix. Compile_zsh emits push/pop pairs around
8623/// each compound command (if/while/[[…]]/((…))/$(…) etc.).
8624/// Token is a `CmdState as u8`.
8625pub const BUILTIN_CMD_PUSH: u16 = 344;
8626
8627/// Pop the top of the command-context stack. Direct port of zsh's
8628/// `cmdpop(void)` (Src/prompt.c:1631).
8629pub const BUILTIN_CMD_POP: u16 = 345;
8630
8631/// Emit an xtrace line built from the top `argc` values on the VM
8632/// stack, peeked WITHOUT consuming. Used to trace simple commands
8633/// AFTER expansion, so `echo for $i` shows as `echo for a` / `echo
8634/// for b`. Direct port of Src/exec.c:2055-2066.
8635pub const BUILTIN_XTRACE_ARGS: u16 = 346;
8636
8637/// Trace one assignment: emits `name=<quoted-value> ` (no newline)
8638/// to xtrerr if XTRACE is on. Coalesces with subsequent
8639/// XTRACE_ASSIGN / XTRACE_ARGS calls onto the SAME line via the
8640/// `XTRACE_DONE_PS4` flag so `a=1 b=2 echo $a $b` produces:
8641/// `<PS4>a=1 b=2 echo 1 2\n`
8642/// matching C zsh's `execcmd_exec` body (Src/exec.c:2517-2582):
8643/// xtr = isset(XTRACE);
8644/// if (xtr) { printprompt4(); doneps4 = 1; }
8645/// while (assign) {
8646/// if (xtr) fprintf(xtrerr, "%s=", name);
8647/// ... eval value ...
8648/// if (xtr) { quotedzputs(val, xtrerr); fputc(' ', xtrerr); }
8649/// }
8650///
8651/// Stack contract on entry: [..., name, value]. Both peeked, NOT
8652/// consumed (the matching SET_VAR call pops them after). argc = 2.
8653pub const BUILTIN_XTRACE_ASSIGN: u16 = 525;
8654
8655/// Emit a trailing `\n` + flush iff XTRACE is on AND PS4 was
8656/// emitted by an earlier XTRACE_ASSIGN this line. Used at the end
8657/// of compile_simple's assignment-only path so the trace line gets
8658/// terminated. Mirrors C's exec.c:3397-3399 (the assign-only return
8659/// path through execcmd_exec which does `fputc('\n', xtrerr);
8660/// fflush(xtrerr)`).
8661///
8662/// Stack: untouched. argc = 0.
8663pub const BUILTIN_XTRACE_NEWLINE: u16 = 526;
8664
8665/// Bridge into subst_port::substitute_brace_array for nested forms
8666/// that need to PRESERVE array shape across the expand_string
8667/// boundary. Stack: `[content_string]`. Returns Value::Array of the
8668/// per-element words. Used by the compile path for
8669/// `${(@)<nested>...##pat}` shapes — the standard substitute_brace
8670/// returns String which collapses array→scalar; this builtin
8671/// preserves the multi-word output via paramsubst's third return
8672/// (`nodes` vec, the C source's `aval` thread).
8673pub const BUILTIN_BRIDGE_BRACE_ARRAY: u16 = 347;
8674
8675/// Word-segment concat with FIRST/LAST sticking. Stack: [lhs, rhs].
8676/// Used for default unquoted splice forms (`${arr[@]}`, `$@`, `$*`)
8677/// where prefix sticks to first element only and suffix to last only.
8678///
8679/// Distribution table:
8680/// - both scalar: `Value::str(a + b)` (fast path)
8681/// - lhs scalar, rhs Array(b₀..bₙ): `Value::Array([lhs+b₀, b₁, …, bₙ])`
8682/// - lhs Array(a₀..aₙ), rhs scalar: `Value::Array([a₀, …, aₙ₋₁, aₙ+rhs])`
8683/// - both Array: `Value::Array([a₀, …, aₙ₋₁, aₙ+b₀, b₁, …, bₙ])`
8684/// (last of lhs merges with first of rhs; the rest stay separate)
8685///
8686/// This is the default zsh semantics for `print -l X${arr[@]}Y` →
8687/// "Xa", "b", "cY" — three distinct args, surrounding text only on ends.
8688pub const BUILTIN_CONCAT_SPLICE: u16 = 319;
8689
8690/// `${(flags)name}` — zsh parameter expansion flags. Stack: [name, flags].
8691/// Flags applied left-to-right. Supported subset (high-value, used by zpwr):
8692///
8693/// `L` — lowercase the value (scalar; or each element if array)
8694/// `U` — uppercase
8695/// `j:sep:` — join array with `sep` (delim is the char after `j`)
8696/// `s:sep:` — split scalar on `sep` (returns Value::Array)
8697/// `f` — split on newlines (shorthand for `s.\n.`)
8698/// `o` — sort array ascending
8699/// `O` — sort array descending
8700/// `P` — indirect: read name's value as another var name, return that's value
8701/// `@` — keep as array (returns Value::Array — useful before `j` etc.)
8702/// `k` — keys of assoc array
8703/// `v` — values of assoc array
8704/// `#` — word count (array length as scalar)
8705///
8706/// Flags can stack: `(jL)` joins then lowercases; `(s.,.U)` splits on `,`
8707/// then uppercases each element. The long-tail flags (`q`, `qq`, `qqq` for
8708/// quoting, `A` for assoc, `%` for prompt expansion, `e`/`g` for re-eval,
8709/// `n`/`p` for numeric, `t` for type, etc.) are deferred — they hit the
8710/// runtime fallback via the catch-all expansion path.
8711pub const BUILTIN_PARAM_FLAG: u16 = 297;
8712
8713/// `ShellHost` implementation that delegates to the current `ShellExecutor`
8714/// via the `with_executor` thread-local.
8715///
8716/// Construct fresh on each VM run (it carries no state itself). The VM
8717/// dispatches host method calls during `vm.run()`, and `with_executor`
8718/// resolves to the executor pointer set by `ExecutorContext::enter`.
8719/// fusevm-host implementation tying bytecode ops to the
8720/// shell executor.
8721/// zshrs-original — no C counterpart. C zsh has no bytecode VM
8722/// to host; everything runs through `execlist()`/`execpline()`
8723/// directly (Src/exec.c lines 1349/1668).
8724pub struct ZshrsHost;
8725
8726impl fusevm::ShellHost for ZshrsHost {
8727 fn glob(&mut self, pattern: &str, _recursive: bool) -> Vec<String> {
8728 with_executor(|exec| exec.expand_glob(pattern))
8729 }
8730
8731 fn tilde_expand(&mut self, s: &str) -> String {
8732 with_executor(|exec| s.to_string())
8733 }
8734
8735 fn brace_expand(&mut self, s: &str) -> Vec<String> {
8736 // Direct call to the canonical brace expander
8737 // (Src/glob.c::xpandbraces port at glob.rs:1678). Was
8738 // routing through singsub which uses PREFORK_SINGLE — that
8739 // flag explicitly suppresses brace expansion in subst.c:166,
8740 // so `print X{1,2,3}Y` returned the literal string.
8741 //
8742 // brace_ccl: respect the BRACE_CCL option which the bracket-
8743 // class form `{a-z}` requires. Pull from executor options.
8744 let brace_ccl = with_executor(|exec|
8745 crate::ported::options::opt_state_get("braceccl").unwrap_or(false));
8746 crate::ported::glob::xpandbraces(s, brace_ccl)
8747 }
8748
8749 fn str_match(&mut self, s: &str, pattern: &str) -> bool {
8750 // Shell glob match — `*`, `?`, `[...]`, alternation. Used by `[[ x = pat ]]`,
8751 // `case` arms, and any other point that compares against a glob pattern.
8752 crate::exec::glob_match_static(s, pattern)
8753 }
8754
8755 fn expand_param(&mut self, name: &str, _modifier: u8, _args: &[fusevm::Value]) -> fusevm::Value {
8756 // Sole funnel: route through `getsparam` matching C zsh's
8757 // `getsparam(name)` → `getvalue` → `getstrvalue` →
8758 // `Param.gsu->getfn` dispatch (Src/params.c:3076 / 2335).
8759 //
8760 // The lookup chain (GSU dispatch + variables + env + array-
8761 // join) lives in `params::getsparam`; subst.rs and this
8762 // bridge both call into it so the logic is in exactly one
8763 // place — mirroring C's "every read goes through getsparam"
8764 // architecture. fuseVM bytecode triggers this bridge when
8765 // the VM hits a PARAM opcode, equivalent to C's tree-walker
8766 // hitting a `${...}` AST node.
8767 //
8768 // Modifier handling: the `_modifier` / `_args` parameters
8769 // are populated by the bytecode compiler but applied by
8770 // separate VM opcodes (LENGTH/STRIP/SUBST/etc.) downstream
8771 // of this fetch — matching C's split between getsparam
8772 // (value fetch) and paramsubst's modifier-walk loop. This
8773 // bridge is the value-fetch step only.
8774 let val_str = crate::ported::params::getsparam(name)
8775 .unwrap_or_default();
8776 fusevm::Value::str(val_str)
8777 }
8778
8779 fn regex_match(&mut self, s: &str, regex: &str) -> bool {
8780 // Untokenize the pattern + subject before compiling. zsh's
8781 // lexer emits Snull/DQ markers around quoted regions; if a
8782 // single-quoted regex like `'([a-z]+)([0-9]+)'` reaches us
8783 // with the Snull bytes still present, regex::Regex::new
8784 // returns Err (the markers aren't valid pattern syntax).
8785 // Direct port of zsh's bin_test path which calls untokenize()
8786 // on both operands before handing to the regex compiler
8787 // (Src/cond.c:cond_match).
8788 let regex = crate::lex::untokenize(regex);
8789 let s = crate::lex::untokenize(s);
8790 let s = s.as_str();
8791 let regex = regex.as_str();
8792 // Compile (cached) and run captures so we can populate the
8793 // zsh-side magic vars: `$MATCH` (full match), `$match[N]`
8794 // (capture groups), and `$mbegin`/`$mend` (1-based offsets).
8795 let mut cache = REGEX_CACHE.lock();
8796 let re = if let Some(re) = cache.get(regex) {
8797 re.clone()
8798 } else {
8799 match regex::Regex::new(regex) {
8800 Ok(re) => {
8801 cache.insert(regex.to_string(), re.clone());
8802 re
8803 }
8804 Err(_) => return false,
8805 }
8806 };
8807 drop(cache);
8808 match re.captures(s) {
8809 Some(caps) => {
8810 let full = caps
8811 .get(0)
8812 .map(|m| m.as_str().to_string())
8813 .unwrap_or_default();
8814 let full_begin = caps
8815 .get(0)
8816 .map(|m| (s[..m.start()].chars().count() + 1).to_string())
8817 .unwrap_or_else(|| "0".to_string());
8818 let full_end = caps
8819 .get(0)
8820 .map(|m| s[..m.end()].chars().count().to_string())
8821 .unwrap_or_else(|| "0".to_string());
8822 let mut group_strs: Vec<String> = Vec::new();
8823 let mut begins: Vec<String> = Vec::new();
8824 let mut ends: Vec<String> = Vec::new();
8825 for i in 1..caps.len() {
8826 if let Some(m) = caps.get(i) {
8827 group_strs.push(m.as_str().to_string());
8828 begins.push((s[..m.start()].chars().count() + 1).to_string());
8829 ends.push(s[..m.end()].chars().count().to_string());
8830 } else {
8831 group_strs.push(String::new());
8832 begins.push("0".to_string());
8833 ends.push("0".to_string());
8834 }
8835 }
8836 with_executor(|exec| {
8837 exec.set_scalar("MATCH".to_string(), full);
8838 exec.set_scalar("MBEGIN".to_string(), full_begin);
8839 exec.set_scalar("MEND".to_string(), full_end);
8840 exec.set_array("match".to_string(), group_strs);
8841 exec.set_array("mbegin".to_string(), begins);
8842 exec.set_array("mend".to_string(), ends);
8843 });
8844 true
8845 }
8846 None => false,
8847 }
8848 }
8849
8850 fn process_sub_in(&mut self, sub: &fusevm::Chunk) -> String {
8851 // Run the sub-chunk synchronously (in the current executor context),
8852 // capture stdout into a temp file, return the path. Synchronous is
8853 // simpler and avoids the thread-local-executor limitation that
8854 // spawned threads can't see. Common consumers (`diff`, `cat`,
8855 // `comm`) read the file once anyway.
8856 let fifo_path = format!(
8857 "/tmp/zshrs_psub_{}_{}",
8858 std::process::id(),
8859 with_executor(|e| {
8860 let n = e.process_sub_counter;
8861 e.process_sub_counter += 1;
8862 n
8863 })
8864 );
8865 let _ = std::fs::remove_file(&fifo_path);
8866 let f = match std::fs::File::create(&fifo_path) {
8867 Ok(f) => f,
8868 Err(_) => return fifo_path,
8869 };
8870 let saved = unsafe { libc::dup(libc::STDOUT_FILENO) };
8871 unsafe {
8872 libc::dup2(f.as_raw_fd(), libc::STDOUT_FILENO);
8873 }
8874 let mut vm = fusevm::VM::new(sub.clone());
8875 register_builtins(&mut vm);
8876 vm.set_shell_host(Box::new(ZshrsHost));
8877 let _ = vm.run();
8878 let _ = std::io::stdout().flush();
8879 unsafe {
8880 libc::dup2(saved, libc::STDOUT_FILENO);
8881 libc::close(saved);
8882 }
8883 fifo_path
8884 }
8885
8886 fn process_sub_out(&mut self, sub: &fusevm::Chunk) -> String {
8887 // `>(cmd)` — consumer reads stdin from a FIFO that the parent
8888 // writes to. Create a real named pipe, fork a child that
8889 // dup2s the read end onto stdin and runs the sub-chunk; return
8890 // the FIFO path to the parent so it writes there.
8891 let fifo_path = format!(
8892 "/tmp/zshrs_psub_out_{}_{}",
8893 std::process::id(),
8894 with_executor(|e| {
8895 let n = e.process_sub_counter;
8896 e.process_sub_counter += 1;
8897 n
8898 })
8899 );
8900 let _ = std::fs::remove_file(&fifo_path);
8901 let cpath = match CString::new(fifo_path.clone()) {
8902 Ok(c) => c,
8903 Err(_) => return fifo_path,
8904 };
8905 if unsafe { libc::mkfifo(cpath.as_ptr(), 0o600) } != 0 {
8906 // Fall back to plain file if mkfifo fails.
8907 let _ = std::fs::write(&fifo_path, "");
8908 return fifo_path;
8909 }
8910 let sub = sub.clone();
8911 let fifo_for_child = fifo_path.clone();
8912 match unsafe { libc::fork() } {
8913 -1 => {
8914 let _ = std::fs::remove_file(&fifo_path);
8915 }
8916 0 => {
8917 // Child: open FIFO for read, dup onto stdin, run sub-chunk, exit.
8918 if let Ok(f) = std::fs::OpenOptions::new().read(true).open(&fifo_for_child) {
8919 let fd = f.as_raw_fd();
8920 unsafe {
8921 libc::dup2(fd, libc::STDIN_FILENO);
8922 }
8923 }
8924 let mut vm = fusevm::VM::new(sub);
8925 register_builtins(&mut vm);
8926 vm.set_shell_host(Box::new(ZshrsHost));
8927 let _ = vm.run();
8928 unsafe { libc::_exit(0) };
8929 }
8930 _ => {
8931 // Parent — return path; child handles cleanup of the FIFO
8932 // once stdin EOFs. (The path may leak if the parent never
8933 // writes; acceptable for common `>(cmd)` idioms.)
8934 }
8935 }
8936 fifo_path
8937 }
8938
8939 fn subshell_begin(&mut self) {
8940 with_executor(|exec| {
8941 // libc::umask returns the previous mask AND sets the new
8942 // one; call with current value to read without changing.
8943 let cur_umask = unsafe {
8944 let m = libc::umask(0o022);
8945 libc::umask(m);
8946 m as u32
8947 };
8948 // Snapshot paramtab + hashed-storage too (step 1 of the
8949 // store unification mirrors writes there; restoring only
8950 // the HashMaps leaks subshell-scoped writes to the parent
8951 // via paramtab readers like `paramsubst → vars_get`).
8952 let paramtab_snap = crate::ported::params::paramtab().read().ok()
8953 .map(|t| t.clone())
8954 .unwrap_or_default();
8955 let paramtab_hashed_snap = crate::ported::params::paramtab_hashed_storage()
8956 .lock().ok()
8957 .map(|m| m.clone())
8958 .unwrap_or_default();
8959 exec.subshell_snapshots.push(SubshellSnapshot {
8960 paramtab: paramtab_snap,
8961 paramtab_hashed_storage: paramtab_hashed_snap,
8962 positional_params: exec.pparams(),
8963 env_vars: std::env::vars().collect(),
8964 // Save the LOGICAL pwd ($PWD env), not `current_dir()`'s
8965 // symlink-resolved path. zsh's subshell isolation per
8966 // Src/exec.c at the `entersubsh` path treats `pwd` (the
8967 // shell-tracked logical PWD) as the carrier — see
8968 // `Src/builtin.c:1239-1242` where cd writes the logical
8969 // dest into `pwd`. Falling back to current_dir() only
8970 // when PWD is unset matches `setupvals` at
8971 // `Src/init.c:1100+`.
8972 cwd: std::env::var("PWD").ok()
8973 .map(std::path::PathBuf::from)
8974 .or_else(|| std::env::current_dir().ok()),
8975 umask: cur_umask,
8976 traps: exec.traps.clone(),
8977 });
8978 // Subshell starts with EXIT trap cleared so the parent's
8979 // EXIT handler doesn't fire when the subshell ends. zsh:
8980 // each subshell has its own trap context. Other signals
8981 // are inherited (well, parent's are still in place — but
8982 // a trap set INSIDE the subshell shouldn't leak out).
8983 exec.traps.remove("EXIT");
8984 let level = exec
8985 .scalar("ZSH_SUBSHELL")
8986 .and_then(|s| s.parse::<i32>().ok())
8987 .unwrap_or(0);
8988 exec.set_scalar("ZSH_SUBSHELL".to_string(), (level + 1).to_string());
8989 });
8990 }
8991
8992 fn subshell_end(&mut self) {
8993 // Fire subshell's EXIT trap BEFORE restoring parent state so
8994 // the trap body sees the subshell's vars and exit status. zsh
8995 // forks for `(...)` so the trap runs in the child process,
8996 // before exit. We mirror by running it here, just before the
8997 // pop+restore. REMOVE the trap before firing so the inner
8998 // execute_script doesn't fire it again at its own end.
8999 let exit_trap_body = with_executor(|exec| exec.traps.remove("EXIT"));
9000 if let Some(body) = exit_trap_body {
9001 // Execute the trap body. Errors during trap execution
9002 // don't bubble — zsh ignores trap-body errors.
9003 with_executor(|exec| {
9004 let _ = exec.execute_script(&body);
9005 });
9006 }
9007 with_executor(|exec| {
9008 if let Some(snap) = exec.subshell_snapshots.pop() {
9009 // Restore paramtab + hashed storage so subshell-scoped
9010 // writes via setsparam/setaparam/sethparam don't leak
9011 // to the parent via paramtab readers.
9012 if let Some(tab) = crate::ported::params::paramtab().write().ok().as_deref_mut() {
9013 *tab = snap.paramtab;
9014 }
9015 if let Some(m) = crate::ported::params::paramtab_hashed_storage()
9016 .lock().ok().as_deref_mut() {
9017 *m = snap.paramtab_hashed_storage;
9018 }
9019 exec.set_pparams(snap.positional_params);
9020 // Restore the OS env to its pre-subshell state.
9021 // Removes any `export` writes the subshell made, and
9022 // restores any vars the subshell unset. Without this
9023 // `(export y=sub)` would leak `y` to the parent shell.
9024 let current: HashMap<String, String> = std::env::vars().collect();
9025 for k in current.keys() {
9026 if !snap.env_vars.contains_key(k) {
9027 std::env::remove_var(k);
9028 }
9029 }
9030 for (k, v) in &snap.env_vars {
9031 if current.get(k) != Some(v) {
9032 std::env::set_var(k, v);
9033 }
9034 }
9035 if let Some(cwd) = snap.cwd {
9036 let _ = std::env::set_current_dir(&cwd);
9037 // Resync $PWD env so a parent `pwd` doesn't read
9038 // the cwd the subshell `cd`'d into.
9039 std::env::set_var("PWD", &cwd);
9040 }
9041 // Restore umask. zsh's `(umask 077)` doesn't leak to
9042 // parent because the subshell forks; we run in-process
9043 // so we manually reset.
9044 unsafe {
9045 libc::umask(snap.umask as libc::mode_t);
9046 }
9047 // Restore parent's traps (the subshell's own traps die
9048 // with it). zsh: `(trap "X" USR1)` doesn't leak the
9049 // USR1 trap out of the subshell.
9050 exec.traps = snap.traps;
9051 }
9052 });
9053 }
9054
9055 fn redirect(&mut self, fd: u8, op: u8, target: &str) {
9056 // Apply a redirection at the OS level for the next command/builtin.
9057 // The host tracks saved fds in a per-executor stack so a future
9058 // `with_redirects_end` can restore. For now, this is a thin wrapper
9059 // that performs the dup2; pairing with explicit save/restore is
9060 // delivered by `with_redirects_begin/end`.
9061 with_executor(|exec| exec.host_apply_redirect(fd, op, target));
9062 }
9063
9064 fn with_redirects_begin(&mut self, count: u8) {
9065 with_executor(|exec| exec.host_redirect_scope_begin(count));
9066 }
9067
9068 fn with_redirects_end(&mut self) {
9069 with_executor(|exec| exec.host_redirect_scope_end());
9070 }
9071
9072 fn heredoc(&mut self, content: &str) {
9073 // C `Src/exec.c:4641` — `parsestr(&buf)` runs parameter +
9074 // command substitution on the heredoc body. The lexer's
9075 // quoted-delimiter detection (`<<'EOF'`) routes through the
9076 // `Op::HereDoc` path in `compile_zsh.rs` which short-circuits
9077 // before reaching here; unquoted forms route through the
9078 // BUILTIN_EXPAND_TEXT mode-4 emit path that calls singsub.
9079 // This handler covers the verbatim/quoted case.
9080 with_executor(|exec| exec.host_set_pending_stdin(content.to_string()));
9081 }
9082
9083 fn herestring(&mut self, content: &str) {
9084 // Shell semantics: herestring appends a newline. `<<<` body
9085 // substitution (`Src/exec.c:4655 getherestr` calls
9086 // `quotesubst` + `untokenize`) lands here verbatim; the
9087 // upstream compiler routes through `Op::HereString` after
9088 // BUILTIN_EXPAND_TEXT for the substitution pass, so callers
9089 // of `host.herestring` see the already-expanded form.
9090 let mut s = content.to_string();
9091 s.push('\n');
9092 with_executor(|exec| exec.host_set_pending_stdin(s));
9093 }
9094
9095 fn exec(&mut self, args: Vec<String>) -> i32 {
9096 // Track `$_` as the last argument of the last command (zsh /
9097 // bash convention). Empty arglists leave it untouched.
9098 if let Some(last) = args.last() {
9099 with_executor(|exec| {
9100 exec.set_scalar("_".to_string(), last.clone());
9101 });
9102 }
9103 // Route external command spawning through `executor.execute_external`
9104 // so intercepts (AOP before/after/around), command_hash lookups,
9105 // pre/postexec hooks, and zsh-specific fork-then-exec all apply.
9106 // Without this override, fusevm's default `host.exec` calls
9107 // `Command::new` directly, bypassing zshrs's dispatch logic.
9108 with_executor(|exec| exec.host_exec_external(&args))
9109 }
9110
9111 fn cmd_subst(&mut self, sub: &fusevm::Chunk) -> String {
9112 // Run the sub-chunk on a nested VM with the same host wired up,
9113 // capturing stdout. The current executor remains active via the
9114 // thread-local — the nested VM uses CallBuiltin to dispatch shell
9115 // ops back through `with_executor`.
9116 let (read_end, write_end) = match os_pipe::pipe() {
9117 Ok(p) => p,
9118 Err(_) => return String::new(),
9119 };
9120 let saved_stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
9121 if saved_stdout < 0 {
9122 return String::new();
9123 }
9124 let write_fd = std::os::unix::io::AsRawFd::as_raw_fd(&write_end);
9125 unsafe {
9126 libc::dup2(write_fd, libc::STDOUT_FILENO);
9127 }
9128 drop(write_end);
9129
9130 let mut vm = fusevm::VM::new(sub.clone());
9131 register_builtins(&mut vm);
9132 vm.set_shell_host(Box::new(ZshrsHost));
9133 let _ = vm.run();
9134 let cmd_status = vm.last_status;
9135
9136 unsafe {
9137 libc::dup2(saved_stdout, libc::STDOUT_FILENO);
9138 libc::close(saved_stdout);
9139 }
9140
9141 // Inner cmd's status not propagated for the same reason as
9142 // run_command_substitution — see GAPS.md.
9143 let _ = cmd_status;
9144
9145 let mut buf = String::new();
9146 let mut reader = read_end;
9147 let _ = reader.read_to_string(&mut buf);
9148 // Strip trailing newlines (POSIX command substitution semantics)
9149 while buf.ends_with('\n') {
9150 buf.pop();
9151 }
9152 buf
9153 }
9154
9155 fn call_function(&mut self, name: &str, args: Vec<String>) -> Option<i32> {
9156 // zsh-bundled rename helpers + zcalc: short-circuit BEFORE the
9157 // function/autoload lookup so the autoloaded zsh source (which
9158 // can hang zshrs's parser on zsh-specific syntax) never runs.
9159 // Native Rust impls live in builtin_zmv / builtin_zcalc.
9160 match name {
9161 "zmv" => {
9162 return Some(crate::extensions::ext_builtins::zmv(&args, "mv"));
9163 }
9164 "zcp" => {
9165 return Some(crate::extensions::ext_builtins::zmv(&args, "cp"));
9166 }
9167 "zln" => {
9168 return Some(crate::extensions::ext_builtins::zmv(&args, "ln"));
9169 }
9170 "zcalc" => {
9171 return Some(crate::extensions::ext_builtins::zcalc(&args));
9172 }
9173 // Daemon-managed z* builtins — thin IPC wrappers. Short-circuit BEFORE
9174 // the function-lookup path so a missing daemon doesn't fall through to
9175 // "command not found". The name list is owned by the daemon crate
9176 // (zshrs_daemon::builtins::ZSHRS_BUILTIN_NAMES); routing through
9177 // try_dispatch keeps this site zero-touch as new z* builtins land.
9178 n if crate::daemon::builtins::is_zshrs_builtin(n) => {
9179 let argv: Vec<String> = std::iter::once(name.to_string()).chain(args).collect();
9180 return Some(crate::daemon::builtins::try_dispatch(n, &argv).unwrap_or(1));
9181 }
9182 _ => {}
9183 }
9184
9185 // Alias check first: `alias g='echo hi'; g` rewrites to `echo hi`
9186 // before normal function/external dispatch. The expansion is
9187 // re-parsed + compiled + run on a nested VM with `args` appended.
9188 // Without this branch, aliases would be silently ignored at
9189 // run-time and `g` would fall through to "command not found".
9190 // Skip when this alias is mid-expansion already — zsh's lexer
9191 // disables an alias inside its own body (so `alias ls='ls -la'`
9192 // works without recursion). We do the same via a HashSet guard
9193 // since we expand at run time, not parse time.
9194 // C uses the `alias.inuse` field on the alias node itself
9195 // (`Src/zsh.h:1256` `struct alias { ... int inuse; }`) — the
9196 // lexer bumps it before splicing the body and clears it after,
9197 // so a recursive use within the body sees `inuse != 0` and
9198 // refuses to re-expand. Mirror that here against the canonical
9199 // `aliastab` instead of a side HashSet on ShellExecutor.
9200 let already_expanding = crate::ported::hashtable::aliastab_lock()
9201 .read()
9202 .ok()
9203 .and_then(|tab| tab.get(name).map(|a| a.inuse != 0))
9204 .unwrap_or(false);
9205 let alias_body = if already_expanding {
9206 None
9207 } else {
9208 with_executor(|exec| exec.alias(name))
9209 };
9210 if let Some(body) = alias_body {
9211 let combined = if args.is_empty() {
9212 body
9213 } else {
9214 let quoted: Vec<String> = args
9215 .iter()
9216 .map(|a| {
9217 let escaped = a.replace('\'', "'\\''");
9218 format!("'{}'", escaped)
9219 })
9220 .collect();
9221 format!("{} {}", body, quoted.join(" "))
9222 };
9223 // Bump inuse → run → clear, matching C's lexer behavior.
9224 if let Ok(mut tab) = crate::ported::hashtable::aliastab_lock().write() {
9225 if let Some(a) = tab.get_mut(name) { a.inuse += 1; }
9226 }
9227 let status = with_executor(|exec| exec.execute_script(&combined).unwrap_or(1));
9228 if let Ok(mut tab) = crate::ported::hashtable::aliastab_lock().write() {
9229 if let Some(a) = tab.get_mut(name) { a.inuse = (a.inuse - 1).max(0); }
9230 }
9231 return Some(status);
9232 }
9233
9234 // Resolve to a compiled Chunk:
9235 // 1. Already in functions_compiled → use as-is
9236 // 2. AST-only (sourced / defined earlier) → compile on demand
9237 // 3. Pending autoload → trigger autoload, then retry the AST path
9238 // 4. Available via fpath ZWC scan → autoload via that, then AST path
9239 // 5. Not a function → None so fusevm falls back to host.exec
9240 let chunk = with_executor(|exec| {
9241 // Autoload pending: the legacy stub in self.functions makes
9242 // maybe_autoload / autoload_function were deleted with
9243 // the old exec.c stubs (they were return-false / no-op).
9244 // The autoload dispatch needs a proper port of
9245 // `Src/builtin.c:bin_autoload` + `Src/exec.c:loadautofn`.
9246 // Until that lands, skip the autoload trigger — the eager
9247 // fpath scan below covers the common interactive case.
9248 if let Some(c) = exec.functions_compiled.get(name) {
9249 return Some(c.clone());
9250 }
9251 exec.functions_compiled.get(name).cloned()
9252 });
9253
9254 let chunk = chunk?;
9255
9256 // FUNCNEST recursion guard. zsh enforces a max depth
9257 // (default 500) — past that the call is refused with
9258 // `<name>: maximum nested function level reached; increase
9259 // FUNCNEST?` and exit 1. Without this, `foo() { foo; }; foo`
9260 // overflowed the Rust stack instead of erroring gracefully.
9261 // zshrs's effective ceiling is lower than zsh's: each
9262 // `call_function` recursion consumes ~40KB of Rust stack
9263 // (the bytecode VM is recursive at the host level), so the
9264 // 8MB default stack tops out around ~150 frames. Cap at 100
9265 // by default — users with deeper need can raise FUNCNEST
9266 // explicitly AND run with a larger stack (RUST_MIN_STACK).
9267 let funcnest_limit = with_executor(|exec| {
9268 exec.scalar("FUNCNEST")
9269 .and_then(|s| s.parse::<usize>().ok())
9270 .unwrap_or(100)
9271 });
9272 let cur_depth = with_executor(|exec| exec.local_scope_depth);
9273 if cur_depth >= funcnest_limit {
9274 eprintln!(
9275 "{}: maximum nested function level reached; increase FUNCNEST?",
9276 name
9277 );
9278 return Some(1);
9279 }
9280
9281 // Save and replace positional params, mirror local-scope save/restore
9282 // from the tree-walker `call_function`. The thread-local executor
9283 // pointer set by the outer VM remains valid for the nested VM —
9284 // nested CallBuiltin handlers and host callbacks all see the same
9285 // executor.
9286 let fn_name = name.to_string();
9287 // Snapshot options at function entry. zsh restores these on
9288 // exit when `local_options` is set at that time (per zshmisc
9289 // LOCAL_OPTIONS — `setopt local_options` and `emulate -L
9290 // ...` both arm the restore). Without this, a function that
9291 // does `setopt no_glob` to scope an option leaked the change
9292 // to the caller, breaking p10k/zinit's per-function emulate
9293 // -L sticky-mode pattern.
9294 let saved_options = crate::ported::options::opt_state_snapshot();
9295 let (
9296 saved_params,
9297 saved_zero,
9298 saved_scriptname,
9299 saved_funcstack,
9300 saved_exit_trap,
9301 ) = with_executor(|exec| {
9302 let prev = exec.pparams();
9303 exec.set_pparams(args.clone());
9304 exec.local_scope_depth += 1;
9305 // c:Src/exec.c doshfunc startparamscope() — bump
9306 // canonical locallevel before the function body runs
9307 // so any inner `local`/`typeset` writes Params at the
9308 // right scope. endparamscope at exit restores.
9309 crate::ported::params::locallevel
9310 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
9311 // Save and clear EXIT trap before function body
9312 // runs. Direct port of zsh's exec.c
9313 // `dotrapargs(SIGEXIT, ...)` deferred-fire pattern
9314 // — an EXIT trap set INSIDE a function fires on
9315 // function return (NOT shell exit), and the outer
9316 // EXIT trap is preserved across the call. Without
9317 // this save/restore, `foo() { trap "echo X" EXIT; }`
9318 // either fired X at SHELL exit (if no outer trap)
9319 // or polluted the parent's EXIT trap.
9320 let saved = exec.traps.remove("EXIT");
9321 // zsh's `$0` inside a function returns the function name
9322 // (under the FUNCTION_ARGZERO option, default on). Save
9323 // the previous `$0` and install the function name.
9324 // Anonymous functions get the cosmetic name `(anon)` —
9325 // zshrs's parser synthesizes `_zshrs_anon_N` /
9326 // `_zshrs_anon_kw_N` for `() { … }` and `function { … }`
9327 // so users would see the internal name otherwise.
9328 let display_name = if fn_name.starts_with("_zshrs_anon_") {
9329 "(anon)".to_string()
9330 } else {
9331 fn_name.clone()
9332 };
9333 let prev_zero = crate::ported::params::getsparam("0");
9334 exec.set_scalar("0".to_string(), display_name.clone());
9335 // scriptname: PS4's `%N` and error-message prefix both
9336 // read `exec.scriptname`. Inside a function, C zsh sets
9337 // `scriptname = dupstring(name)` at Src/exec.c:5903 so
9338 // `%N` shows the function name. Save the outer
9339 // scriptname before overwrite; restored on return.
9340 let prev_scriptname = std::mem::replace(
9341 &mut exec.scriptname,
9342 Some(display_name.clone()),
9343 );
9344 // funcstack: prepend the function name; outermost call
9345 // is at the END of the stack per zsh.
9346 let prev_stack = exec.array("funcstack");
9347 let mut new_stack = vec![fn_name.clone()];
9348 if let Some(ref s) = prev_stack {
9349 new_stack.extend_from_slice(s);
9350 }
9351 exec.set_array("funcstack".to_string(), new_stack);
9352 let line_base = exec
9353 .function_line_base
9354 .get(&fn_name)
9355 .copied()
9356 .unwrap_or(0);
9357 let def_file = exec
9358 .function_def_file
9359 .get(&fn_name)
9360 .cloned()
9361 .flatten();
9362 exec.prompt_funcstack
9363 .push((fn_name.clone(), line_base, def_file));
9364 // Set `$_` BEFORE the function body runs. zsh: inside
9365 // a function, `echo $_` reads the function name (when
9366 // called with no args) or the last call-arg.
9367 // Without this, internal builtins that ran before
9368 // (like REGISTER_COMPILED_FN) leaked their last arg
9369 // (the function body source!) as $_.
9370 let dollar_underscore = args.last().cloned().unwrap_or_else(|| fn_name.clone());
9371 exec.set_scalar("_".to_string(), dollar_underscore.clone());
9372 exec.pending_underscore = Some(dollar_underscore);
9373 (
9374 prev,
9375 prev_zero,
9376 prev_scriptname,
9377 prev_stack,
9378 saved,
9379 )
9380 });
9381
9382 let mut vm = fusevm::VM::new(chunk);
9383 register_builtins(&mut vm);
9384 // Seed the function-body VM with the parent's `$?` so a
9385 // function that reads `$?` BEFORE running any command sees
9386 // the caller's last status. Direct port of zsh's exec.c
9387 // `execfuncdef`/`doshfunc` semantics — function entry does
9388 // NOT reset `$?`. Without this, `false; foo() { echo $?; }; foo`
9389 // printed 0 instead of 1 because the fresh VM defaulted
9390 // last_status to 0.
9391 vm.last_status = with_executor(|exec| exec.last_status());
9392 let _ = vm.run();
9393 let status = vm.last_status;
9394
9395 // Fire any EXIT trap set INSIDE the function body, then
9396 // restore the outer EXIT trap. zsh fires the function-
9397 // scope EXIT trap BEFORE control returns to the caller,
9398 // so `foo() { trap "echo X" EXIT; }; foo; echo done`
9399 // outputs `X` then `done`. Without this, X never fired
9400 // (or fired at shell exit, polluting unrelated commands).
9401 let inner_exit = with_executor(|exec| exec.traps.remove("EXIT"));
9402 if let Some(action) = inner_exit {
9403 // Run the trap in the current (still-inside-function)
9404 // scope so it sees `$0 == fn_name` etc. Errors are
9405 // swallowed — zsh's trap dispatch tolerates body
9406 // failures.
9407 let _ = with_executor(|exec| {
9408 exec.set_last_status(status);
9409 exec.execute_script_zsh_pipeline(&action)
9410 });
9411 }
9412 // Restore outer EXIT trap (if any).
9413 if let Some(outer) = saved_exit_trap {
9414 with_executor(|exec| {
9415 exec.traps.insert("EXIT".to_string(), outer);
9416 });
9417 }
9418
9419 with_executor(|exec| {
9420 // Set `$_` to the last arg the function was called with
9421 // (or the function name when called with no args). zsh:
9422 // `$_` after `foo arg` is `arg`; after `foo` (no args) is
9423 // `foo`. The function-internal `pop_args` calls polluted
9424 // pending_underscore with internal command args; clear and
9425 // overwrite here so the caller sees the function's call
9426 // form, not internal `return 42` arg.
9427 let last_call_arg = args.last().cloned().unwrap_or_else(|| fn_name.clone());
9428 exec.set_scalar("_".to_string(), last_call_arg.clone());
9429 exec.pending_underscore = Some(last_call_arg);
9430 exec.set_pparams(saved_params);
9431 exec.local_scope_depth -= 1;
9432 // LOCAL_OPTIONS: when set at function exit, restore all
9433 // options to the snapshot taken at entry. `emulate -L`
9434 // arms this; plugin code uses both forms to scope option
9435 // changes inside helpers without leaking to callers.
9436 // Without it, `setopt no_glob` inside a helper polluted
9437 // the caller's option state.
9438 if crate::ported::options::opt_state_get("localoptions").unwrap_or(false) {
9439 // Walk all options touched since entry; reset to snapshot.
9440 let current = crate::ported::options::opt_state_snapshot();
9441 for (k, _) in ¤t {
9442 if !saved_options.contains_key(k) {
9443 crate::ported::options::opt_state_unset(k);
9444 }
9445 }
9446 for (k, v) in &saved_options {
9447 crate::ported::options::opt_state_set(k, *v);
9448 }
9449 }
9450 let _ = exec; // exec still used below for other restores
9451 // Restore `$0`, scriptname, and `$funcstack` to their
9452 // pre-call values. scriptname mirrors C exec.c:5907
9453 // `scriptname = oldscriptname;` after execode returns.
9454 match saved_zero {
9455 Some(v) => {
9456 exec.set_scalar("0".to_string(), v);
9457 }
9458 None => {
9459 exec.unset_scalar("0");
9460 }
9461 }
9462 exec.scriptname = saved_scriptname;
9463 exec.prompt_funcstack.pop();
9464 match saved_funcstack {
9465 Some(s) => {
9466 exec.set_array("funcstack".to_string(), s);
9467 }
9468 None => {
9469 exec.unset_array("funcstack");
9470 }
9471 }
9472 // c:Src/exec.c doshfunc → endparamscope(). Walks paramtab
9473 // restoring Param.old chain for every local declaration
9474 // made during the call.
9475 crate::ported::params::endparamscope();
9476 });
9477
9478 Some(status)
9479 }
9480}
9481
9482// ───────────────────────────────────────────────────────────────────────────
9483// Host-routed shell ops: ShellExecutor methods invoked by ZshrsHost from the
9484// fusevm VM. Not a port of Src/exec.c (see file-level docs above) — they're
9485// the bridge between fusevm opcodes and ShellExecutor state.
9486// ───────────────────────────────────────────────────────────────────────────
9487impl crate::ported::exec::ShellExecutor {
9488 // ─── Host-routed shell ops (called by ZshrsHost from fusevm) ────────────
9489
9490 /// Apply a single redirection. The current scope's saved-fd vec gets a
9491 /// dup of the original fd so it can be restored by `host_redirect_scope_end`.
9492 /// `op_byte` matches `fusevm::op::redirect_op::*`.
9493 /// Apply a file-open result to a redirect fd; on error, emit
9494 /// zsh-format diagnostic, set redirect_failed, sink fd to /dev/null.
9495 /// Shared between WRITE/APPEND/READ/CLOBBER arms in
9496 /// host_apply_redirect to keep the error-handling identical.
9497 fn redir_open_or_fail(
9498 fd: i32,
9499 result: std::io::Result<std::fs::File>,
9500 target: &str,
9501 redirect_failed: &mut bool,
9502 ) -> bool {
9503 match result {
9504 Ok(file) => {
9505 let new_fd = file.into_raw_fd();
9506 unsafe {
9507 libc::dup2(new_fd, fd);
9508 libc::close(new_fd);
9509 }
9510 true
9511 }
9512 Err(e) => {
9513 let msg = match e.kind() {
9514 std::io::ErrorKind::PermissionDenied => "permission denied",
9515 std::io::ErrorKind::NotFound => "no such file or directory",
9516 std::io::ErrorKind::IsADirectory => "is a directory",
9517 _ => "redirect failed",
9518 };
9519 eprintln!("zshrs:1: {}: {}", msg, target);
9520 *redirect_failed = true;
9521 if let Ok(devnull) = std::fs::OpenOptions::new()
9522 .read(true)
9523 .write(true)
9524 .open("/dev/null")
9525 {
9526 let new_fd = devnull.into_raw_fd();
9527 unsafe {
9528 libc::dup2(new_fd, fd);
9529 libc::close(new_fd);
9530 }
9531 }
9532 false
9533 }
9534 }
9535 }
9536
9537 pub fn host_apply_redirect(&mut self, fd: u8, op_byte: u8, target: &str) {
9538 // `&>` / `&>>` always target both fd 1 and fd 2 regardless of the
9539 // fd byte the parser supplied (the lexer's tokfd clamp makes the
9540 // raw value unreliable for these forms).
9541 let fd: i32 = if matches!(op_byte, r::WRITE_BOTH | r::APPEND_BOTH) {
9542 1
9543 } else {
9544 fd as i32
9545 };
9546 let saved = unsafe { libc::dup(fd) };
9547 if saved >= 0 {
9548 if let Some(top) = self.redirect_scope_stack.last_mut() {
9549 top.push((fd, saved));
9550 } else {
9551 // No scope — leave saved fd open and let the next scope
9552 // reclaim it. (Caller without a scope leaks the dup; this
9553 // matches `WithRedirects` parser construction always wrapping.)
9554 unsafe { libc::close(saved) };
9555 }
9556 }
9557 // For `&>` / `&>>` also save fd 2 so the scope restores it after
9558 // the body. Otherwise stderr stays redirected past the command.
9559 if matches!(op_byte, r::WRITE_BOTH | r::APPEND_BOTH) {
9560 let saved2 = unsafe { libc::dup(2) };
9561 if saved2 >= 0 {
9562 if let Some(top) = self.redirect_scope_stack.last_mut() {
9563 top.push((2, saved2));
9564 } else {
9565 unsafe { libc::close(saved2) };
9566 }
9567 }
9568 }
9569 match op_byte {
9570 r::WRITE => {
9571 // Honor `setopt noclobber`: refuse to overwrite an
9572 // existing regular file unless `>!` / `>|` (CLOBBER).
9573 // zsh internally stores the inverted-name `clobber`
9574 // (default ON); `setopt noclobber` writes
9575 // `clobber=false`. Honor both keys.
9576 let noclobber = crate::ported::options::opt_state_get("noclobber").unwrap_or(false)
9577 || !crate::ported::options::opt_state_get("clobber").unwrap_or(true);
9578 if noclobber && std::path::Path::new(target).exists() {
9579 eprintln!("zshrs:1: file exists: {}", target);
9580 self.set_last_status(1);
9581 // Sink the upcoming command's stdout to /dev/null
9582 // so we don't leak its output to the terminal.
9583 // zsh skips the command entirely; we approximate by
9584 // discarding the output (the redirect target was
9585 // the user's chosen sink, but with noclobber the
9586 // file is protected — discarding matches the
9587 // user's intent better than printing to terminal).
9588 if let Ok(file) = std::fs::OpenOptions::new().write(true).open("/dev/null") {
9589 let new_fd = file.into_raw_fd();
9590 unsafe {
9591 libc::dup2(new_fd, fd);
9592 libc::close(new_fd);
9593 }
9594 }
9595 return;
9596 }
9597 if !Self::redir_open_or_fail(
9598 fd,
9599 std::fs::File::create(target),
9600 target,
9601 &mut self.redirect_failed,
9602 ) {
9603 self.set_last_status(1);
9604 }
9605 }
9606 r::CLOBBER => {
9607 if !Self::redir_open_or_fail(
9608 fd,
9609 std::fs::File::create(target),
9610 target,
9611 &mut self.redirect_failed,
9612 ) {
9613 self.set_last_status(1);
9614 }
9615 }
9616 r::APPEND => {
9617 if !Self::redir_open_or_fail(
9618 fd,
9619 std::fs::OpenOptions::new()
9620 .create(true)
9621 .append(true)
9622 .open(target),
9623 target,
9624 &mut self.redirect_failed,
9625 ) {
9626 self.set_last_status(1);
9627 }
9628 }
9629 r::READ => {
9630 if !Self::redir_open_or_fail(
9631 fd,
9632 std::fs::File::open(target),
9633 target,
9634 &mut self.redirect_failed,
9635 ) {
9636 self.set_last_status(1);
9637 }
9638 }
9639 r::READ_WRITE => {
9640 if let Ok(file) = std::fs::OpenOptions::new()
9641 .create(true)
9642 .truncate(false) // <> opens existing-or-new without truncating
9643 .read(true)
9644 .write(true)
9645 .open(target)
9646 {
9647 let new_fd = file.into_raw_fd();
9648 unsafe {
9649 libc::dup2(new_fd, fd);
9650 libc::close(new_fd);
9651 }
9652 }
9653 }
9654 r::DUP_READ | r::DUP_WRITE => {
9655 // Target is a numeric fd reference like `&3`. The parser
9656 // strips the `&` prefix before we get here in some paths,
9657 // others retain it — accept both. Also support `-` for
9658 // close-fd (`<&-` / `>&-`) per POSIX.
9659 let n = target.trim_start_matches('&');
9660 if n == "-" {
9661 unsafe { libc::close(fd) };
9662 } else if let Ok(src_fd) = n.parse::<i32>() {
9663 unsafe { libc::dup2(src_fd, fd) };
9664 } else {
9665 tracing::warn!(target = %target, "DUP redir: target not parseable as fd");
9666 }
9667 }
9668 r::WRITE_BOTH => {
9669 if let Ok(file) = std::fs::File::create(target) {
9670 let new_fd = file.into_raw_fd();
9671 unsafe {
9672 libc::dup2(new_fd, 1);
9673 libc::dup2(new_fd, 2);
9674 libc::close(new_fd);
9675 }
9676 }
9677 }
9678 r::APPEND_BOTH => {
9679 if let Ok(file) = std::fs::OpenOptions::new()
9680 .create(true)
9681 .append(true)
9682 .open(target)
9683 {
9684 let new_fd = file.into_raw_fd();
9685 unsafe {
9686 libc::dup2(new_fd, 1);
9687 libc::dup2(new_fd, 2);
9688 libc::close(new_fd);
9689 }
9690 }
9691 }
9692 _ => {}
9693 }
9694 }
9695
9696 /// Push a fresh redirect scope. `_count` is informational — the actual
9697 /// saved fds are appended by host_apply_redirect into the top scope.
9698 pub fn host_redirect_scope_begin(&mut self, _count: u8) {
9699 self.redirect_scope_stack.push(Vec::new());
9700 }
9701
9702 /// Pop the top redirect scope, restoring saved fds.
9703 pub fn host_redirect_scope_end(&mut self) {
9704 if let Some(saved) = self.redirect_scope_stack.pop() {
9705 for (fd, saved_fd) in saved.into_iter().rev() {
9706 unsafe {
9707 libc::dup2(saved_fd, fd);
9708 libc::close(saved_fd);
9709 }
9710 }
9711 }
9712 }
9713
9714 /// Set up `content` as stdin (fd 0) for the next command via a real pipe.
9715 /// Used by `Op::HereDoc(idx)` and `Op::HereString`.
9716 ///
9717 /// The pattern: dup2 the read end of a fresh pipe onto fd 0, save the
9718 /// original fd 0 into the active redirect scope so `WithRedirectsEnd`
9719 /// restores it, and spawn a thread that writes `content` to the write end
9720 /// and closes it (so the consumer sees EOF after the body). A thread is
9721 /// needed because writing could block on a finite pipe buffer.
9722 pub fn host_set_pending_stdin(&mut self, content: String) {
9723 let (read_end, write_end) = match os_pipe::pipe() {
9724 Ok(p) => p,
9725 Err(_) => return,
9726 };
9727 let saved = unsafe { libc::dup(libc::STDIN_FILENO) };
9728 if saved >= 0 {
9729 if let Some(top) = self.redirect_scope_stack.last_mut() {
9730 top.push((libc::STDIN_FILENO, saved));
9731 } else {
9732 unsafe { libc::close(saved) };
9733 }
9734 }
9735 let read_fd = std::os::unix::io::AsRawFd::as_raw_fd(&read_end);
9736 unsafe { libc::dup2(read_fd, libc::STDIN_FILENO) };
9737 drop(read_end);
9738 std::thread::spawn(move || {
9739 let mut w = write_end;
9740 let _ = w.write_all(content.as_bytes());
9741 });
9742 }
9743
9744 /// Spawn an external command using zshrs's full dispatch logic
9745 /// (intercepts, command_hash, redirect handling). Used by
9746 /// `ZshrsHost::exec` so the bytecode VM's `Op::Exec` and
9747 /// `Op::CallFunction` external fallback get the same semantics as
9748 /// the tree-walker's `execute_external` rather than a plain
9749 /// `Command::new` shortcut. Returns the exit status.
9750 pub fn host_exec_external(&mut self, args: &[String]) -> i32 {
9751 // If a glob expansion in this command's argv triggered the
9752 // nomatch error path, suppress the actual exec and return
9753 // status 1 — mirrors zsh's command-aborted-on-glob-error
9754 // behaviour. The flag is reset BEFORE returning so the next
9755 // command starts clean.
9756 if self.current_command_glob_failed.get() {
9757 self.current_command_glob_failed.set(false);
9758 self.set_last_status(1);
9759 return 1;
9760 }
9761 let Some((cmd, rest)) = args.split_first() else {
9762 return 0;
9763 };
9764 // Empty command name (e.g. result of an empty `$(false)`
9765 // command-sub being the only word) — zsh: no command runs,
9766 // exit status preserved from prior step. Was hitting the
9767 // "command not found: " path with empty name.
9768 if cmd.is_empty() && rest.is_empty() {
9769 return self.last_status();
9770 }
9771 let rest_vec: Vec<String> = rest.to_vec();
9772 // Update `$_` with the just-arriving argv so the next command
9773 // reads `_=<last_arg>`. Mirrors C zsh's writeback in
9774 // `execcmd_exec` (Src/exec.c). Per `args.last()` semantics,
9775 // when invoked as `cmd a b c`, `$_` becomes "c" — for a bare
9776 // command with no args, `$_` becomes the command name itself.
9777 crate::ported::params::set_zunderscore(args);
9778
9779 // Builtins not in fusevm's name→id table fall through to
9780 // host.exec. Catch them here before the OS-level exec attempts
9781 // to spawn a non-existent binary.
9782 match cmd.as_str() {
9783 "sched" => return self.bin_sched(&rest_vec),
9784 "echotc" => return crate::fusevm_bridge::dispatch_builtin("echotc", rest_vec.clone()),
9785 "echoti" => return crate::fusevm_bridge::dispatch_builtin("echoti", rest_vec.clone()),
9786 // "getln" handler deleted with its stub.
9787 "zpty" => return crate::fusevm_bridge::dispatch_builtin("zpty", rest_vec.clone()),
9788 "ztcp" => return crate::fusevm_bridge::dispatch_builtin("ztcp", rest_vec.clone()),
9789 "zsocket" => {
9790 // Shim — parses the BUILTIN spec "ad:ltv" from
9791 // socket.c:276 into a real `options` struct, then
9792 // invokes the canonical free-fn port at
9793 // crate::ported::modules::socket::bin_zsocket whose
9794 // signature matches C `bin_zsocket(nam, args, ops,
9795 // func)` exactly.
9796 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9797 argscount: 0, argsalloc: 0 };
9798 let mut positional: Vec<String> = Vec::new();
9799 let mut i = 0;
9800 while i < rest_vec.len() {
9801 let a = &rest_vec[i];
9802 if a == "--" {
9803 i += 1;
9804 positional.extend_from_slice(&rest_vec[i..]);
9805 break;
9806 }
9807 if let Some(rest) = a.strip_prefix('-') {
9808 if rest.is_empty() { positional.push(a.clone()); i += 1; continue; }
9809 let chars: Vec<char> = rest.chars().collect();
9810 let mut j = 0;
9811 while j < chars.len() {
9812 let c = chars[j] as u8;
9813 if c == b'd' {
9814 ops.ind[c as usize] = (ops.args.len() + 1) as u8;
9815 let rest_after = &rest[j + 1..];
9816 if !rest_after.is_empty() {
9817 ops.args.push(rest_after.to_string());
9818 } else {
9819 i += 1;
9820 ops.args.push(rest_vec.get(i).cloned().unwrap_or_default());
9821 }
9822 ops.argscount = ops.args.len() as i32;
9823 break;
9824 }
9825 if c.is_ascii_alphabetic() { ops.ind[c as usize] = 1; }
9826 j += 1;
9827 }
9828 } else {
9829 positional.push(a.clone());
9830 }
9831 i += 1;
9832 }
9833 return bin_zsocket("zsocket", &positional, &ops, 0);
9834 }
9835 "private" => {
9836 // bin_private now takes the canonical C signature
9837 // (name, args, ops, func, assigns) per Src/Modules/
9838 // param_private.c:217.
9839 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9840 argscount: 0, argsalloc: 0 };
9841 let mut assigns: Vec<(String, String)> = Vec::new();
9842 return crate::modules::param_private::bin_private("private",
9843 &rest_vec, &mut ops, 0, &mut assigns);
9844 }
9845 "zformat" => return crate::fusevm_bridge::dispatch_builtin("zformat", rest_vec.clone()),
9846 "zregexparse" => return crate::fusevm_bridge::dispatch_builtin("zregexparse", rest_vec.clone()),
9847 // `unalias`/`unhash`/`unfunction` share `bin_unhash` but
9848 // each carries its own funcid (BIN_UNALIAS / BIN_UNHASH /
9849 // BIN_UNFUNCTION) in the BUILTINS table. Route through
9850 // execbuiltin so the correct funcid + optstr propagate —
9851 // without this `unalias` was a silent no-op.
9852 "unalias" | "unhash" | "unfunction" => {
9853 // Fallback when fusevm doesn't have a BUILTIN_*
9854 // opcode registered for the name (e.g. shell-builtin
9855 // table mismatch). Route through execbuiltin with the
9856 // correct entry from BUILTINS.
9857 let bn_idx = crate::ported::builtin::BUILTINS.iter()
9858 .position(|b| b.node.nam == cmd.as_str());
9859 if let Some(idx) = bn_idx {
9860 let bn_static: &'static crate::ported::zsh_h::builtin =
9861 &crate::ported::builtin::BUILTINS[idx];
9862 let bn_ptr = bn_static as *const _ as *mut _;
9863 return crate::ported::builtin::execbuiltin(
9864 rest_vec, Vec::new(), bn_ptr);
9865 }
9866 return 1;
9867 }
9868 // zsh-bundled rename helpers — implemented natively in
9869 // Rust so `autoload -U zmv` works without shipping the
9870 // function source. (Without this, the autoload path hangs.)
9871 "zmv" => return crate::extensions::ext_builtins::zmv(&rest_vec, "mv"),
9872 "zcp" => return crate::extensions::ext_builtins::zmv(&rest_vec, "cp"),
9873 "zln" => return crate::extensions::ext_builtins::zmv(&rest_vec, "ln"),
9874 "zcalc" => return crate::extensions::ext_builtins::zcalc(&rest_vec),
9875 "zselect" => {
9876 // Canonical bin_zselect per zselect.c:65 takes
9877 // (nam, args, ops, func); the C source parses its
9878 // own option string inline, so an empty Options is
9879 // sufficient at this call site.
9880 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9881 argscount: 0, argsalloc: 0 };
9882 return crate::ported::modules::zselect::bin_zselect(
9883 "zselect", &rest_vec, &ops, 0);
9884 }
9885 "cap" => return crate::fusevm_bridge::dispatch_builtin("cap", rest_vec.clone()),
9886 "getcap" => return crate::fusevm_bridge::dispatch_builtin("getcap", rest_vec.clone()),
9887 "setcap" => return crate::fusevm_bridge::dispatch_builtin("setcap", rest_vec.clone()),
9888 "yes" => return self.builtin_yes(&rest_vec),
9889 "nl" => return self.builtin_nl(&rest_vec),
9890 "env" => return self.builtin_env(&rest_vec),
9891 "printenv" => return self.builtin_printenv(&rest_vec),
9892 "tty" => return self.builtin_tty(&rest_vec),
9893 "chgrp" => {
9894 // Canonical bin_chown per files.c:725 with func=BIN_CHGRP
9895 // per the bintab entry at c:805. BUILTIN spec "hRs".
9896 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9897 argscount: 0, argsalloc: 0 };
9898 let mut positional: Vec<String> = Vec::new();
9899 let mut i = 0;
9900 while i < rest_vec.len() {
9901 let a = &rest_vec[i];
9902 if a == "--" { i += 1; positional.extend_from_slice(&rest_vec[i..]); break; }
9903 if let Some(rest) = a.strip_prefix('-') {
9904 if rest.is_empty() { positional.push(a.clone()); i += 1; continue; }
9905 for c in rest.chars() {
9906 let cb = c as u8;
9907 if cb.is_ascii_alphabetic() { ops.ind[cb as usize] = 1; }
9908 }
9909 } else {
9910 positional.push(a.clone());
9911 }
9912 i += 1;
9913 }
9914 return crate::ported::modules::files::bin_chown(
9915 "chgrp", &positional, &ops,
9916 crate::ported::modules::files::BIN_CHGRP);
9917 }
9918 "nproc" => return self.builtin_nproc(&rest_vec),
9919 "expr" => return self.builtin_expr(&rest_vec),
9920 "sha256sum" => return self.builtin_sha256sum(&rest_vec),
9921 "base64" => return self.builtin_base64(&rest_vec),
9922 "tac" => return self.builtin_tac(&rest_vec),
9923 "expand" => return self.builtin_expand(&rest_vec),
9924 "unexpand" => return self.builtin_unexpand(&rest_vec),
9925 "paste" => return self.builtin_paste(&rest_vec),
9926 "fold" => return self.builtin_fold(&rest_vec),
9927 "shuf" => return self.builtin_shuf(&rest_vec),
9928 "comm" => return self.builtin_comm(&rest_vec),
9929 "cksum" => return self.builtin_cksum(&rest_vec),
9930 "factor" => return self.builtin_factor(&rest_vec),
9931 "tsort" => return self.builtin_tsort(&rest_vec),
9932 "sum" => return self.builtin_sum(&rest_vec),
9933 "mkfifo" => return self.builtin_mkfifo(&rest_vec),
9934 "link" => return self.builtin_link(&rest_vec),
9935 "unlink" => return self.builtin_unlink(&rest_vec),
9936 "dircolors" => return self.builtin_dircolors(&rest_vec),
9937 "groups" => return self.builtin_groups(&rest_vec),
9938 "arch" => return self.builtin_arch(&rest_vec),
9939 "nice" => return self.builtin_nice(&rest_vec),
9940 "logname" => return self.builtin_logname(&rest_vec),
9941 "tput" => return self.builtin_tput(&rest_vec),
9942 "users" => return self.builtin_users(&rest_vec),
9943 // "sync" => return self.bin_sync(&rest_vec),
9944 "zbuild" => return self.builtin_zbuild(&rest_vec),
9945 // `zf_*` aliases from `zsh/files` (Src/Modules/files.c
9946 // BUILTIN table at line 816-824). The C source binds
9947 // both unprefixed (`chmod`) and prefixed (`zf_chmod`)
9948 // names to the SAME `bin_chmod` etc. handlers — the
9949 // prefixed forms exist so a script can portably reach
9950 // the builtin even when a function or alias has shadowed
9951 // the bare name. Each arm routes through the canonical
9952 // free-fn port of Src/Modules/files.c, parsing the BUILTIN
9953 // optstr inline since the framework doesn't pre-parse.
9954 "zf_mkdir" | "mkdir" => {
9955 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9956 argscount: 0, argsalloc: 0 };
9957 let mut positional: Vec<String> = Vec::new();
9958 let mut i = 0;
9959 while i < rest_vec.len() {
9960 let a = &rest_vec[i];
9961 if a == "--" {
9962 i += 1;
9963 positional.extend_from_slice(&rest_vec[i..]);
9964 break;
9965 }
9966 if let Some(rest) = a.strip_prefix('-') {
9967 if rest.is_empty() { positional.push(a.clone()); i += 1; continue; }
9968 let chars: Vec<char> = rest.chars().collect();
9969 let mut j = 0;
9970 while j < chars.len() {
9971 let c = chars[j] as u8;
9972 if c == b'm' {
9973 ops.ind[c as usize] = (ops.args.len() + 1) as u8;
9974 let rest_after = &rest[j + 1..];
9975 if !rest_after.is_empty() {
9976 ops.args.push(rest_after.to_string());
9977 } else {
9978 i += 1;
9979 ops.args.push(rest_vec.get(i).cloned().unwrap_or_default());
9980 }
9981 ops.argscount = ops.args.len() as i32;
9982 break;
9983 }
9984 if c.is_ascii_alphabetic() { ops.ind[c as usize] = 1; }
9985 j += 1;
9986 }
9987 } else {
9988 positional.push(a.clone());
9989 }
9990 i += 1;
9991 }
9992 return crate::ported::modules::files::bin_mkdir(
9993 cmd, &positional, &ops, 0);
9994 }
9995 "zf_rm" => {
9996 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9997 argscount: 0, argsalloc: 0 };
9998 let mut positional: Vec<String> = Vec::new();
9999 let mut i = 0;
10000 while i < rest_vec.len() {
10001 let a = &rest_vec[i];
10002 if a == "--" {
10003 i += 1;
10004 positional.extend_from_slice(&rest_vec[i..]);
10005 break;
10006 }
10007 if let Some(rest) = a.strip_prefix('-') {
10008 if rest.is_empty() { positional.push(a.clone()); i += 1; continue; }
10009 for c in rest.chars() {
10010 let cb = c as u8;
10011 if cb.is_ascii_alphabetic() { ops.ind[cb as usize] = 1; }
10012 }
10013 } else {
10014 positional.push(a.clone());
10015 }
10016 i += 1;
10017 }
10018 return crate::ported::modules::files::bin_rm(
10019 "zf_rm", &positional, &ops, 0);
10020 }
10021 "zf_rmdir" => {
10022 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
10023 argscount: 0, argsalloc: 0 };
10024 return crate::ported::modules::files::bin_rmdir(
10025 "zf_rmdir", &rest_vec, &ops, 0);
10026 }
10027 // `zstat` — port of zsh/stat module (Src/Modules/stat.c
10028 // BUILTIN("zstat", …)). Returns file metadata as
10029 // `field value` pairs / an assoc / a plus-separated
10030 // list depending on flags. zsh ALSO registers `stat`
10031 // bound to the same handler, but that name conflicts
10032 // with the system `stat(1)` binary (every script that
10033 // calls `stat -f '%Lp' …` would break). zsh resolves
10034 // this through opt-in `zmodload`; zshrs's modules are
10035 // statically linked so we keep `stat` routing to the
10036 // external command and only intercept the unambiguous
10037 // `zstat` name.
10038 "zstat" => {
10039 // bin_stat now takes the canonical C signature
10040 // (name, args, ops, func) per Src/Modules/stat.c:368.
10041 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
10042 argscount: 0, argsalloc: 0 };
10043 return crate::modules::stat::bin_stat("zstat", &rest_vec, &ops, 0);
10044 }
10045 _ => {}
10046 }
10047
10048 // AOP intercepts: when an `intercept :before/:around/:after foo` block
10049 // is registered, dynamic-command-name dispatch must consult it before
10050 // spawning. Without this, `cmd=ls; $cmd` bypasses every intercept that
10051 // a literal `ls` would trigger. The full_cmd string mirrors what the
10052 // tree-walker era passed (cmd + args joined by space) so existing
10053 // pattern matchers continue to work.
10054 if !self.intercepts.is_empty() {
10055 let full_cmd = if rest_vec.is_empty() {
10056 cmd.clone()
10057 } else {
10058 format!("{} {}", cmd, rest_vec.join(" "))
10059 };
10060 if let Some(intercept_result) = self.run_intercepts(cmd, &full_cmd, &rest_vec) {
10061 return intercept_result.unwrap_or(127);
10062 }
10063 }
10064
10065 // User-defined function lookup before OS-level exec. zsh's
10066 // dynamic-command-name dispatch (`cmd=hook1; $cmd`) checks
10067 // the function table FIRST — without this, `$f` for a
10068 // function-name `f` was always falling through to
10069 // `execute_external` and erroring "command not found".
10070 // Plugin code uses this pattern constantly:
10071 // for f in "${precmd_functions[@]}"; do "$f"; done
10072 if self.function_exists(cmd) {
10073 if let Some(status) = self.dispatch_function_call(cmd, &rest_vec) {
10074 return status;
10075 }
10076 }
10077
10078 self.execute_external(cmd, &rest_vec, &[]).unwrap_or(127)
10079 }
10080}