zsh/fusevm_bridge.rs
1//! fusevm bytecode-VM bridge for ShellExecutor.
2//!
3//! **Extension** — has no Src/exec.c counterpart. C zsh's `Src/exec.c::execlist`
4//! (and related routines) implement the native **wordcode VM** that executes
5//! compiler output from `parse.c`. zshrs compiles the parsed AST to fusevm
6//! bytecode and runs it on a stack VM; this
7//! file holds the bridge between fusevm's `ShellHost` trait and our
8//! `ShellExecutor` state, the thread-local executor pointer, all
9//! `BUILTIN_*` opcode constants, and the giant `register_builtins`
10//! handler table that wires zsh builtins onto fusevm CallBuiltin
11//! opcodes.
12
13#![allow(unused_imports)]
14
15use crate::history::HistoryEngine;
16// MathState is private to math.rs (no public state struct in math.c).
17use crate::options::ZSH_OPTIONS_SET;
18// TcpSessions struct deleted — modules/tcp.rs uses ZTCP_SESSIONS thread_local.
19use crate::zftp::zftp_globals;
20// `Profiler` deleted — zprof state is module-level statics now.
21use crate::zutil::style_table;
22use compsys::cache::CompsysCache;
23use compsys::CompInitResult;
24use parking_lot::Mutex;
25use std::collections::{HashMap, HashSet};
26use std::env;
27use std::path::PathBuf;
28use std::sync::LazyLock;
29use indexmap::IndexMap;
30
31use crate::ported::exec::*;
32use crate::exec_jobs::JobState;
33use crate::intercepts::{AdviceKind, Intercept, intercept_matches};
34use std::io::Write;
35
36// ═══════════════════════════════════════════════════════════════════════════
37// Thread-local executor context for VM builtin dispatch
38// ═══════════════════════════════════════════════════════════════════════════
39
40use std::cell::{Cell, RefCell};
41use crate::socket::bin_zsocket;
42use fusevm::shell_builtins::*;
43use fusevm::Value;
44use crate::ported::zsh_h::{options, MAX_OPS};
45use std::io::BufRead;
46use crate::ported::zle::zle_thingy::getwidgettarget;
47use std::time::{SystemTime, UNIX_EPOCH};
48use std::cmp::Ordering;
49use std::fs;
50use std::os::unix::fs::PermissionsExt;
51use std::time::Instant;
52use std::os::unix::fs::MetadataExt;
53use std::os::unix::fs::FileTypeExt;
54use std::io::Write as _;
55use std::os::unix::io::AsRawFd;
56use std::ffi::CString;
57use std::io::Read;
58use std::os::unix::io::IntoRawFd;
59use fusevm::op::redirect_op as r;
60
61thread_local! {
62 /// Mirror of C zsh's `doneps4` local in execcmd_exec
63 /// (Src/exec.c:2517+). Tracks whether PS4 has been emitted
64 /// for the current xtrace line so a coalesced sequence of
65 /// XTRACE_ASSIGN + XTRACE_ARGS produces ONE line:
66 /// `<PS4>a=1 b=2 echo 1 2\n`
67 /// instead of three. Reset to false by XTRACE_ARGS /
68 /// XTRACE_NEWLINE after emitting the trailing `\n`.
69 static XTRACE_DONE_PS4: Cell<bool> = const { Cell::new(false) };
70}
71
72// Thread-local pointer to the current ShellExecutor.
73// Set before VM execution, cleared after. Used by builtin handlers.
74thread_local! {
75 static CURRENT_EXECUTOR: RefCell<Option<*mut ShellExecutor>> = const { RefCell::new(None) };
76}
77
78/// RAII guard that sets/clears the thread-local executor pointer.
79///
80/// Idempotent: calling `enter` when a context is already active is a no-op
81/// for the entry side, and the guard's drop only clears the thread-local if
82/// *this* call was the one that set it. Nested `execute_command` invocations
83/// (e.g. from inside a builtin handler) reuse the outer pointer instead of
84/// stomping it.
85pub(crate) struct ExecutorContext {
86 we_set_it: bool,
87}
88
89impl ExecutorContext {
90 pub(crate) fn enter(executor: &mut ShellExecutor) -> Self {
91 let we_set_it = CURRENT_EXECUTOR.with(|cell| {
92 let mut slot = cell.borrow_mut();
93 if slot.is_some() {
94 false
95 } else {
96 *slot = Some(executor as *mut ShellExecutor);
97 true
98 }
99 });
100 ExecutorContext { we_set_it }
101 }
102}
103
104impl Drop for ExecutorContext {
105 fn drop(&mut self) {
106 if self.we_set_it {
107 CURRENT_EXECUTOR.with(|cell| {
108 *cell.borrow_mut() = None;
109 });
110 }
111 }
112}
113
114
115/// Access the current executor from a builtin handler.
116/// # Safety
117/// Only call this from within a VM execution context (after ExecutorContext::enter).
118#[inline]
119pub(crate) fn with_executor<F, R>(f: F) -> R
120where
121 F: FnOnce(&mut ShellExecutor) -> R,
122{
123 CURRENT_EXECUTOR.with(|cell| {
124 let ptr = cell
125 .borrow()
126 .expect("with_executor called outside VM context");
127 // SAFETY: The pointer is valid for the duration of VM execution,
128 // and we're single-threaded within the executor.
129 let executor = unsafe { &mut *ptr };
130 f(executor)
131 })
132}
133
134// `try_with_executor` removed. The fallible variant was the bridge
135// canonical-side ports used to mirror writes into the legacy
136// exec.{variables,arrays,assoc_arrays,positional_params,
137// local_save_stack,var_attrs} caches. All such mirrors are now
138// dissolved: canonical setaparam / sethparam / setsparam write
139// paramtab as the single source of truth; fusevm reads consult
140// paramtab via exec.array() / exec.assoc() / exec.scalar() /
141// exec.pparams() / exec.param_flags() helpers.
142//
143// PM_LOCAL scope save lives in BUILTIN_LOCAL dispatcher (with
144// with_executor — the mandatory variant). Eval execute_script lives
145// in BUILTIN_EVAL dispatcher. Lastval reads from canonical LASTVAL
146// atomic that exec.set_last_status keeps current.
147
148
149
150
151/// Look up a canonical builtin by name in `BUILTINS` and dispatch
152/// via `execbuiltin` (Src/builtin.c:250). Mirrors the C pattern
153/// `bn = gethashnode2(builtintab, name); execbuiltin(args, redirs,
154/// bn)`. Returns 1 if no such builtin or if the handler is wired
155/// to None (legacy stub entry — the wrapper on ShellExecutor still
156/// covers those until their handler is wired into BUILTINS).
157pub(crate) fn dispatch_builtin(name: &str, args: Vec<String>) -> i32 {
158 let bn_idx = crate::ported::builtin::BUILTINS.iter()
159 .position(|b| b.node.nam == name);
160 if let Some(idx) = bn_idx {
161 let bn_static: &'static crate::ported::zsh_h::builtin =
162 &crate::ported::builtin::BUILTINS[idx];
163 let bn_ptr = bn_static as *const _ as *mut _;
164 crate::ported::builtin::execbuiltin(args, Vec::new(), bn_ptr)
165 } else {
166 1
167 }
168}
169
170
171/// Register all zsh builtins with the VM.
172pub(crate) fn register_builtins(vm: &mut fusevm::VM) {
173
174 // Macro for builtins that user functions are allowed to shadow.
175 // zsh dispatch order is alias → function → builtin; without the
176 // try_user_fn_override probe a `cat() { ... }; cat` would silently
177 // run the C builtin and ignore the user function.
178 macro_rules! reg_overridable {
179 ($vm:expr, $id:expr, $name:literal, $method:ident) => {
180 $vm.register_builtin($id, |vm, argc| {
181 let args = pop_args(vm, argc);
182 if let Some(s) = try_user_fn_override($name, &args) {
183 return Value::Status(s);
184 }
185 let status = with_executor(|exec| exec.$method(&args));
186 Value::Status(status)
187 });
188 };
189 }
190
191 // Core builtins
192 vm.register_builtin(BUILTIN_CD, |vm, argc| {
193 let args = pop_args(vm, argc);
194 if let Some(s) = try_user_fn_override("cd", &args) {
195 return Value::Status(s);
196 }
197 let status = dispatch_builtin("cd", args);
198 Value::Status(status)
199 });
200
201 vm.register_builtin(BUILTIN_PWD, |vm, argc| {
202 let args = pop_args(vm, argc);
203 if let Some(s) = try_user_fn_override("pwd", &args) {
204 return Value::Status(s);
205 }
206 let status = with_executor(|exec| exec.builtin_pwd_with_args(&args));
207 Value::Status(status)
208 });
209
210 vm.register_builtin(BUILTIN_ECHO, |vm, argc| {
211 let args = pop_args(vm, argc);
212 if let Some(s) = try_user_fn_override("echo", &args) {
213 return Value::Status(s);
214 }
215 // Update `$_` to the last arg before running. C zsh sets
216 // zunderscore in execcmd_exec for every simple command,
217 // including builtins.
218 crate::ported::params::set_zunderscore(&args);
219 let status = with_executor(|exec| exec.builtin_echo(&args, &[]));
220 Value::Status(status)
221 });
222
223 vm.register_builtin(BUILTIN_PRINT, |vm, argc| {
224 let args = pop_args(vm, argc);
225 if let Some(s) = try_user_fn_override("print", &args) {
226 return Value::Status(s);
227 }
228 crate::ported::params::set_zunderscore(&args);
229 let status = dispatch_builtin("print", args);
230 Value::Status(status)
231 });
232
233 vm.register_builtin(BUILTIN_PRINTF, |vm, argc| {
234 let args = pop_args(vm, argc);
235 if let Some(s) = try_user_fn_override("printf", &args) {
236 return Value::Status(s);
237 }
238 let status = with_executor(|exec| exec.builtin_printf(&args));
239 Value::Status(status)
240 });
241
242 vm.register_builtin(BUILTIN_EXPORT, |vm, argc| {
243 let args = pop_args(vm, argc);
244 let status = with_executor(|exec| exec.builtin_export(&args));
245 Value::Status(status)
246 });
247
248 vm.register_builtin(BUILTIN_UNSET, |vm, argc| {
249 let args = pop_args(vm, argc);
250 let status = dispatch_builtin("unset", args);
251 Value::Status(status)
252 });
253
254 vm.register_builtin(BUILTIN_SOURCE, |vm, argc| {
255 let args = pop_args(vm, argc);
256 let status = dispatch_builtin("dot", args);
257 Value::Status(status)
258 });
259
260 vm.register_builtin(BUILTIN_EXIT, |vm, argc| {
261 let args = pop_args(vm, argc);
262 let status = dispatch_builtin("exit", args);
263 Value::Status(status)
264 });
265
266 vm.register_builtin(BUILTIN_RETURN, |vm, argc| {
267 let args = pop_args(vm, argc);
268 // zsh: bare `return` (no arg) returns with the status of
269 // the most recently executed command — `false; return`
270 // returns 1, not 0. Direct port of zsh's bin_break/RETURN.
271 // The executor's `last_status` is stale here (synced at
272 // statement boundaries, not after each VM op), so read
273 // the live `vm.last_status` instead.
274 let live_status = vm.last_status;
275 let status = {
276 // Sync canonical LASTVAL to the VM's view BEFORE
277 // bin_break("return") reads it for the no-arg fallback.
278 with_executor(|exec| exec.set_last_status(live_status));
279 dispatch_builtin("return", args)
280 };
281 Value::Status(status)
282 });
283
284 vm.register_builtin(BUILTIN_TRUE, |vm, argc| {
285 let args = pop_args(vm, argc);
286 if let Some(s) = try_user_fn_override("true", &args) {
287 return Value::Status(s);
288 }
289 // `$_` for no-arg `true` is the command name itself ("true").
290 // pop_args only updates pending_underscore from args; for
291 // bare command name we backfill here.
292 if args.is_empty() {
293 with_executor(|exec| {
294 exec.pending_underscore = Some("true".to_string());
295 });
296 }
297 Value::Status(0)
298 });
299 vm.register_builtin(BUILTIN_FALSE, |vm, argc| {
300 let args = pop_args(vm, argc);
301 if let Some(s) = try_user_fn_override("false", &args) {
302 return Value::Status(s);
303 }
304 if args.is_empty() {
305 with_executor(|exec| {
306 exec.pending_underscore = Some("false".to_string());
307 });
308 }
309 Value::Status(1)
310 });
311 vm.register_builtin(BUILTIN_COLON, |vm, argc| {
312 let args = pop_args(vm, argc);
313 if args.is_empty() {
314 with_executor(|exec| {
315 exec.pending_underscore = Some(":".to_string());
316 });
317 }
318 Value::Status(0)
319 });
320
321 vm.register_builtin(BUILTIN_TEST, |vm, argc| {
322 let args = pop_args(vm, argc);
323 let status = dispatch_builtin("test", args);
324 Value::Status(status)
325 });
326
327 // Variable declaration
328 vm.register_builtin(BUILTIN_LOCAL, |vm, argc| {
329 let args = pop_args(vm, argc);
330 // Canonical bin_local handles the entire scope chain
331 // (`pm->old = oldpm` at Src/params.c:1137 inside createparam,
332 // `pm->level = locallevel` at Src/builtin.c:2576 inside
333 // typeset_single). The dispatcher only routes args.
334 let status = with_executor(|exec| exec.builtin_local(&args));
335 Value::Status(status)
336 });
337
338 vm.register_builtin(BUILTIN_TYPESET, |vm, argc| {
339 let args = pop_args(vm, argc);
340 // fusevm's builtin_id maps both `declare` and `typeset` to
341 // BUILTIN_TYPESET, so this handler must default to the
342 // typeset error-prefix. compile_zsh special-cases `declare`
343 // to register BUILTIN_DECLARE explicitly so that path keeps
344 // the `declare:` prefix in error messages.
345 let status = dispatch_builtin("typeset", args);
346 Value::Status(status)
347 });
348
349 vm.register_builtin(BUILTIN_DECLARE, |vm, argc| {
350 let args = pop_args(vm, argc);
351 let status = with_executor(|exec| exec.builtin_declare(&args));
352 Value::Status(status)
353 });
354
355 vm.register_builtin(BUILTIN_READONLY, |vm, argc| {
356 let args = pop_args(vm, argc);
357 let status = with_executor(|exec| exec.builtin_readonly(&args));
358 Value::Status(status)
359 });
360
361 vm.register_builtin(BUILTIN_INTEGER, |vm, argc| {
362 let args = pop_args(vm, argc);
363 let status = with_executor(|exec| exec.builtin_integer(&args));
364 Value::Status(status)
365 });
366
367 vm.register_builtin(BUILTIN_FLOAT, |vm, argc| {
368 let args = pop_args(vm, argc);
369 let status = with_executor(|exec| exec.builtin_float(&args));
370 Value::Status(status)
371 });
372
373 // I/O
374 vm.register_builtin(BUILTIN_READ, |vm, argc| {
375 let args = pop_args(vm, argc);
376 let status = dispatch_builtin("read", args);
377 Value::Status(status)
378 });
379
380 // Control flow
381 vm.register_builtin(BUILTIN_BREAK, |vm, argc| {
382 let args = pop_args(vm, argc);
383 let status = dispatch_builtin("break", args);
384 Value::Status(status)
385 });
386
387 vm.register_builtin(BUILTIN_CONTINUE, |vm, argc| {
388 let args = pop_args(vm, argc);
389 let status = dispatch_builtin("continue", args);
390 Value::Status(status)
391 });
392
393 vm.register_builtin(BUILTIN_SHIFT, |vm, argc| {
394 let args = pop_args(vm, argc);
395 let status = dispatch_builtin("shift", args);
396 Value::Status(status)
397 });
398
399 vm.register_builtin(BUILTIN_EVAL, |vm, argc| {
400 // Direct port of `bin_eval(UNUSED(char *nam), char **argv, UNUSED(Options ops), UNUSED(int func))` body from Src/builtin.c:6151:
401 // `if (!*argv) return 0;`
402 // `prog = parse_string(zjoin(argv, ' ', 1), 1);`
403 // `execode(prog, 1, 0, "eval");`
404 // The execode invocation lives here (not in the canonical
405 // free-fn) because it must run through the bytecode VM's
406 // current executor — the same VM that's mid-dispatch.
407 let args = pop_args(vm, argc);
408 if args.is_empty() {
409 return Value::Status(0); // c:6160
410 }
411 let src = args.join(" "); // c:6166
412 let status = with_executor(|exec| { // c:6175 execode
413 exec.execute_script(&src).unwrap_or(1)
414 });
415 Value::Status(status)
416 });
417
418 // BUILTIN_EXEC / BUILTIN_COMMAND / BUILTIN_BUILTIN wires deleted
419 // along with their handler stubs in src/exec.rs. The opcodes were
420 // never emitted by the fusevm compiler (zero `Op::CallBuiltin(...)`
421 // references) — leftover from the deleted pre-fusevm `Src/exec.c` port.
422 // When `command` / `exec` / `builtin` land as canonical
423 // ports in `src/ported/builtin.rs` (`Src/builtin.c:4017 bin_command`,
424 // `:6052 bin_exec`, etc.), wire them here through `execbuiltin`.
425
426 vm.register_builtin(BUILTIN_LET, |vm, argc| {
427 let args = pop_args(vm, argc);
428 let status = dispatch_builtin("let", args);
429 Value::Status(status)
430 });
431
432 // Job control
433 vm.register_builtin(BUILTIN_JOBS, |vm, argc| {
434 let args = pop_args(vm, argc);
435 let status = dispatch_builtin("jobs", args);
436 Value::Status(status)
437 });
438
439 vm.register_builtin(BUILTIN_FG, |vm, argc| {
440 let args = pop_args(vm, argc);
441 let status = dispatch_builtin("fg", args);
442 Value::Status(status)
443 });
444
445 vm.register_builtin(BUILTIN_BG, |vm, argc| {
446 let args = pop_args(vm, argc);
447 let status = dispatch_builtin("bg", args);
448 Value::Status(status)
449 });
450
451 vm.register_builtin(BUILTIN_KILL, |vm, argc| {
452 let args = pop_args(vm, argc);
453 let status = dispatch_builtin("kill", args);
454 Value::Status(status)
455 });
456
457 vm.register_builtin(BUILTIN_DISOWN, |vm, argc| {
458 let args = pop_args(vm, argc);
459 let status = dispatch_builtin("disown", args);
460 Value::Status(status)
461 });
462
463 vm.register_builtin(BUILTIN_WAIT, |vm, argc| {
464 let args = pop_args(vm, argc);
465 let status = dispatch_builtin("wait", args);
466 Value::Status(status)
467 });
468
469 vm.register_builtin(BUILTIN_SUSPEND, |vm, argc| {
470 let args = pop_args(vm, argc);
471 let status = dispatch_builtin("suspend", args);
472 Value::Status(status)
473 });
474
475 // History
476 // BUILTIN_HISTORY / BUILTIN_R wires deleted with their stubs.
477 // Opcodes never emitted by the fusevm compiler (dead since the
478 // pre-fusevm executor port was replaced). `bin_fc` stays — it's wired to
479 // the canonical port at `src/ported/builtin.rs`.
480 vm.register_builtin(BUILTIN_FC, |vm, argc| {
481 let args = pop_args(vm, argc);
482 let status = dispatch_builtin("fc", args);
483 Value::Status(status)
484 });
485
486 // Aliases
487 vm.register_builtin(BUILTIN_ALIAS, |vm, argc| {
488 let args = pop_args(vm, argc);
489 let status = dispatch_builtin("alias", args);
490 Value::Status(status)
491 });
492
493 // BUILTIN_UNALIAS wire deleted with its stub.
494
495 // Options
496 vm.register_builtin(BUILTIN_SET, |vm, argc| {
497 let args = pop_args(vm, argc);
498 let status = dispatch_builtin("set", args);
499 Value::Status(status)
500 });
501
502 vm.register_builtin(BUILTIN_SETOPT, |vm, argc| {
503 let args = pop_args(vm, argc);
504 // Canonical bin_setopt per options.c:580 — `isun` discriminant
505 // flips the action polarity; setopt → 0, unsetopt → 1.
506 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
507 argscount: 0, argsalloc: 0 };
508 let status = crate::ported::options::bin_setopt(
509 "setopt", &args, &ops, 0);
510 Value::Status(status)
511 });
512
513 vm.register_builtin(BUILTIN_UNSETOPT, |vm, argc| {
514 let args = pop_args(vm, argc);
515 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
516 argscount: 0, argsalloc: 0 };
517 let status = crate::ported::options::bin_setopt(
518 "unsetopt", &args, &ops, 1);
519 Value::Status(status)
520 });
521
522 vm.register_builtin(BUILTIN_SHOPT, |vm, argc| {
523 let args = pop_args(vm, argc);
524 let status = crate::extensions::ext_builtins::shopt(&args);
525 Value::Status(status)
526 });
527
528 vm.register_builtin(BUILTIN_EMULATE, |vm, argc| {
529 let args = pop_args(vm, argc);
530 let status = dispatch_builtin("emulate", args);
531 Value::Status(status)
532 });
533
534 vm.register_builtin(BUILTIN_GETOPTS, |vm, argc| {
535 let args = pop_args(vm, argc);
536 let status = dispatch_builtin("getopts", args);
537 Value::Status(status)
538 });
539
540 // BUILTIN_AUTOLOAD / BUILTIN_UNFUNCTION wires deleted with their
541 // stubs. `bin_functions` stays — wired to the canonical port.
542 vm.register_builtin(BUILTIN_FUNCTIONS, |vm, argc| {
543 let args = pop_args(vm, argc);
544 let status = dispatch_builtin("functions", args);
545 Value::Status(status)
546 });
547
548 // Traps
549 vm.register_builtin(BUILTIN_TRAP, |vm, argc| {
550 let args = pop_args(vm, argc);
551 let status = dispatch_builtin("trap", args);
552 Value::Status(status)
553 });
554
555 // BUILTIN_PUSHD / BUILTIN_POPD wires deleted with their stubs.
556 // `bin_dirs` stays — wired to the canonical port.
557 vm.register_builtin(BUILTIN_DIRS, |vm, argc| {
558 let args = pop_args(vm, argc);
559 let status = dispatch_builtin("dirs", args);
560 Value::Status(status)
561 });
562
563 // type / whence / where / which all route through `bin_whence`
564 // (canonical port at `src/ported/builtin.rs:3734` of
565 // `Src/builtin.c:3975`). Each gets its own opcode so funcid +
566 // defopts come from the BUILTINS table entry — execbuiltin
567 // applies them correctly via the module-level dispatch_builtin.
568 vm.register_builtin(BUILTIN_WHENCE, |vm, argc| {
569 let args = pop_args(vm, argc);
570 Value::Status(dispatch_builtin("whence", args))
571 });
572 vm.register_builtin(BUILTIN_TYPE, |vm, argc| {
573 let args = pop_args(vm, argc);
574 Value::Status(dispatch_builtin("type", args))
575 });
576 vm.register_builtin(BUILTIN_WHICH, |vm, argc| {
577 let args = pop_args(vm, argc);
578 Value::Status(dispatch_builtin("which", args))
579 });
580 vm.register_builtin(BUILTIN_WHERE, |vm, argc| {
581 let args = pop_args(vm, argc);
582 Value::Status(dispatch_builtin("where", args))
583 });
584
585 vm.register_builtin(BUILTIN_HASH, |vm, argc| {
586 let args = pop_args(vm, argc);
587 let status = dispatch_builtin("hash", args);
588 Value::Status(status)
589 });
590
591 vm.register_builtin(BUILTIN_REHASH, |vm, argc| {
592 let args = pop_args(vm, argc);
593 let status = dispatch_builtin("rehash", args);
594 Value::Status(status)
595 });
596
597 // `unhash`/`unalias`/`unfunction` share `bin_unhash` (Src/builtin.c:
598 // c:4350) but each carries its own funcid (BIN_UNHASH /
599 // BIN_UNALIAS / BIN_UNFUNCTION) in the BUILTINS table. Route each
600 // through `execbuiltin` so the correct funcid + optstr propagate
601 // — earlier wiring passed funcid=0 unconditionally and `unalias`
602 // silently no-op'd on the cmdnamtab path.
603 fn unhash_via_execbuiltin(name: &str, args: Vec<String>) -> i32 {
604 let bn_idx = crate::ported::builtin::BUILTINS.iter()
605 .position(|b| b.node.nam == name);
606 if let Some(idx) = bn_idx {
607 let bn_static: &'static crate::ported::zsh_h::builtin =
608 &crate::ported::builtin::BUILTINS[idx];
609 let bn_ptr = bn_static as *const _ as *mut _;
610 crate::ported::builtin::execbuiltin(args, Vec::new(), bn_ptr)
611 } else {
612 1
613 }
614 }
615 vm.register_builtin(BUILTIN_UNHASH, |vm, argc| {
616 let args = pop_args(vm, argc);
617 Value::Status(unhash_via_execbuiltin("unhash", args))
618 });
619 vm.register_builtin(BUILTIN_UNALIAS, |vm, argc| {
620 let args = pop_args(vm, argc);
621 Value::Status(unhash_via_execbuiltin("unalias", args))
622 });
623 vm.register_builtin(BUILTIN_UNFUNCTION, |vm, argc| {
624 let args = pop_args(vm, argc);
625 Value::Status(unhash_via_execbuiltin("unfunction", args))
626 });
627
628 // Completion
629 vm.register_builtin(BUILTIN_COMPGEN, |vm, argc| {
630 let args = pop_args(vm, argc);
631 let status = with_executor(|exec| exec.builtin_compgen(&args));
632 Value::Status(status)
633 });
634
635 vm.register_builtin(BUILTIN_COMPLETE, |vm, argc| {
636 let args = pop_args(vm, argc);
637 let status = with_executor(|exec| exec.builtin_complete(&args));
638 Value::Status(status)
639 });
640
641 vm.register_builtin(BUILTIN_COMPOPT, |vm, argc| {
642 let args = pop_args(vm, argc);
643 let status = with_executor(|exec| exec.builtin_compopt(&args));
644 Value::Status(status)
645 });
646
647 vm.register_builtin(BUILTIN_COMPADD, |vm, argc| {
648 let args = pop_args(vm, argc);
649 let status = dispatch_builtin("compadd", args);
650 Value::Status(status)
651 });
652
653 vm.register_builtin(BUILTIN_COMPSET, |vm, argc| {
654 let args = pop_args(vm, argc);
655 let status = dispatch_builtin("compset", args);
656 Value::Status(status)
657 });
658
659 vm.register_builtin(BUILTIN_COMPDEF, |vm, argc| {
660 let args = pop_args(vm, argc);
661 let status = with_executor(|exec| exec.builtin_compdef(&args));
662 Value::Status(status)
663 });
664
665 vm.register_builtin(BUILTIN_COMPINIT, |vm, argc| {
666 let args = pop_args(vm, argc);
667 let status = with_executor(|exec| exec.builtin_compinit(&args));
668 Value::Status(status)
669 });
670
671 vm.register_builtin(BUILTIN_CDREPLAY, |vm, argc| {
672 let args = pop_args(vm, argc);
673 let status = with_executor(|exec| exec.builtin_cdreplay(&args));
674 Value::Status(status)
675 });
676
677 // Zsh-specific
678 vm.register_builtin(BUILTIN_ZSTYLE, |vm, argc| {
679 let args = pop_args(vm, argc);
680 let status = dispatch_builtin("zstyle", args);
681 Value::Status(status)
682 });
683
684 vm.register_builtin(BUILTIN_ZMODLOAD, |vm, argc| {
685 let args = pop_args(vm, argc);
686 let status = dispatch_builtin("zmodload", args);
687 Value::Status(status)
688 });
689
690 vm.register_builtin(BUILTIN_BINDKEY, |vm, argc| {
691 let args = pop_args(vm, argc);
692 let status = dispatch_builtin("bindkey", args);
693 Value::Status(status)
694 });
695
696 vm.register_builtin(BUILTIN_ZLE, |vm, argc| {
697 let args = pop_args(vm, argc);
698 let status = dispatch_builtin("zle", args);
699 Value::Status(status)
700 });
701
702 vm.register_builtin(BUILTIN_VARED, |vm, argc| {
703 let args = pop_args(vm, argc);
704 let status = dispatch_builtin("vared", args);
705 Value::Status(status)
706 });
707
708 vm.register_builtin(BUILTIN_ZCOMPILE, |vm, argc| {
709 let args = pop_args(vm, argc);
710 let status = with_executor(|exec| exec.bin_zcompile(&args));
711 Value::Status(status)
712 });
713
714 vm.register_builtin(BUILTIN_ZFORMAT, |vm, argc| {
715 let args = pop_args(vm, argc);
716 let status = dispatch_builtin("zformat", args);
717 Value::Status(status)
718 });
719
720 vm.register_builtin(BUILTIN_ZPARSEOPTS, |vm, argc| {
721 let args = pop_args(vm, argc);
722 let status = dispatch_builtin("zparseopts", args);
723 Value::Status(status)
724 });
725
726 vm.register_builtin(BUILTIN_ZREGEXPARSE, |vm, argc| {
727 let args = pop_args(vm, argc);
728 let status = dispatch_builtin("zregexparse", args);
729 Value::Status(status)
730 });
731
732 // Resource limits
733 vm.register_builtin(BUILTIN_ULIMIT, |vm, argc| {
734 let args = pop_args(vm, argc);
735 let status = with_executor(|exec| exec.bin_ulimit(&args));
736 Value::Status(status)
737 });
738
739 vm.register_builtin(BUILTIN_LIMIT, |vm, argc| {
740 let args = pop_args(vm, argc);
741 let status = with_executor(|exec| exec.bin_limit(&args));
742 Value::Status(status)
743 });
744
745 vm.register_builtin(BUILTIN_UNLIMIT, |vm, argc| {
746 let args = pop_args(vm, argc);
747 let status = with_executor(|exec| exec.bin_unlimit(&args));
748 Value::Status(status)
749 });
750
751 vm.register_builtin(BUILTIN_UMASK, |vm, argc| {
752 let args = pop_args(vm, argc);
753 let status = dispatch_builtin("umask", args);
754 Value::Status(status)
755 });
756
757 // Misc
758 vm.register_builtin(BUILTIN_TIMES, |vm, argc| {
759 let args = pop_args(vm, argc);
760 let status = dispatch_builtin("times", args);
761 Value::Status(status)
762 });
763
764 vm.register_builtin(BUILTIN_CALLER, |vm, argc| {
765 let args = pop_args(vm, argc);
766 let status = with_executor(|exec| exec.builtin_caller(&args));
767 Value::Status(status)
768 });
769
770 vm.register_builtin(BUILTIN_HELP, |vm, argc| {
771 let args = pop_args(vm, argc);
772 let status = with_executor(|exec| exec.builtin_help(&args));
773 Value::Status(status)
774 });
775
776 vm.register_builtin(BUILTIN_ENABLE, |vm, argc| {
777 let args = pop_args(vm, argc);
778 let status = dispatch_builtin("enable", args);
779 Value::Status(status)
780 });
781
782 vm.register_builtin(BUILTIN_DISABLE, |vm, argc| {
783 let args = pop_args(vm, argc);
784 let status = dispatch_builtin("disable", args);
785 Value::Status(status)
786 });
787
788 // BUILTIN_NOGLOB wire deleted with its stub.
789
790 vm.register_builtin(BUILTIN_TTYCTL, |vm, argc| {
791 let args = pop_args(vm, argc);
792 let status = dispatch_builtin("ttyctl", args);
793 Value::Status(status)
794 });
795
796 vm.register_builtin(BUILTIN_SYNC, |vm, argc| {
797 let args = pop_args(vm, argc);
798 // Canonical bin_sync per files.c:53 — `sync(); return 0;`.
799 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
800 argscount: 0, argsalloc: 0 };
801 let status = crate::ported::modules::files::bin_sync(
802 "sync", &args, &ops, 0);
803 Value::Status(status)
804 });
805
806 vm.register_builtin(BUILTIN_MKDIR, |vm, argc| {
807 let args = pop_args(vm, argc);
808 // Canonical bin_mkdir wired in BUILTINS table (files.c:63).
809 // execbuiltin handles the "pm:" optstr parsing.
810 Value::Status(dispatch_builtin("mkdir", args))
811 });
812
813 vm.register_builtin(BUILTIN_STRFTIME, |vm, argc| {
814 let args = pop_args(vm, argc);
815 // Canonical bin_strftime takes (nam, argv, ops, func) per
816 // Src/Modules/datetime.c:187. Adapt &[String] → &[&str] +
817 // empty options inline (datetime parses no flags).
818 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
819 argscount: 0, argsalloc: 0 };
820 let argv: Vec<&str> = args.iter().map(String::as_str).collect();
821 let status = crate::ported::modules::datetime::bin_strftime(
822 "strftime", &argv, &ops, 0);
823 Value::Status(status)
824 });
825
826 vm.register_builtin(BUILTIN_ZSLEEP, |vm, argc| {
827 let args = pop_args(vm, argc);
828 let status = crate::extensions::ext_builtins::zsleep(&args);
829 Value::Status(status)
830 });
831
832 vm.register_builtin(BUILTIN_ZSYSTEM, |vm, argc| {
833 let args = pop_args(vm, argc);
834 // bin_zsystem now takes the canonical C signature
835 // (name, args, ops, func) per Src/Modules/system.c:806.
836 let ops = crate::ported::zsh_h::options {
837 ind: [0u8; crate::ported::zsh_h::MAX_OPS],
838 args: Vec::new(), argscount: 0, argsalloc: 0,
839 };
840 let _ = with_executor(|_exec| ());
841 let status = crate::modules::system::bin_zsystem("zsystem", &args, &ops, 0);
842 Value::Status(status)
843 });
844
845 // PCRE
846 vm.register_builtin(BUILTIN_PCRE_COMPILE, |vm, argc| {
847 let args = pop_args(vm, argc);
848 let status = dispatch_builtin("pcre_compile", args);
849 Value::Status(status)
850 });
851
852 vm.register_builtin(BUILTIN_PCRE_MATCH, |vm, argc| {
853 let args = pop_args(vm, argc);
854 let status = dispatch_builtin("pcre_match", args);
855 Value::Status(status)
856 });
857
858 vm.register_builtin(BUILTIN_PCRE_STUDY, |vm, argc| {
859 let args = pop_args(vm, argc);
860 let status = dispatch_builtin("pcre_study", args);
861 Value::Status(status)
862 });
863
864 // Database (GDBM)
865 vm.register_builtin(BUILTIN_ZTIE, |vm, argc| {
866 let args = pop_args(vm, argc);
867 let status = dispatch_builtin("ztie", args);
868 Value::Status(status)
869 });
870
871 vm.register_builtin(BUILTIN_ZUNTIE, |vm, argc| {
872 let args = pop_args(vm, argc);
873 let status = dispatch_builtin("zuntie", args);
874 Value::Status(status)
875 });
876
877 vm.register_builtin(BUILTIN_ZGDBMPATH, |vm, argc| {
878 let args = pop_args(vm, argc);
879 let status = dispatch_builtin("zgdbmpath", args);
880 Value::Status(status)
881 });
882
883 // Prompt
884 vm.register_builtin(BUILTIN_PROMPTINIT, |vm, argc| {
885 let args = pop_args(vm, argc);
886 Value::Status(crate::extensions::ext_builtins::promptinit(&args))
887 });
888
889 vm.register_builtin(BUILTIN_PROMPT, |vm, argc| {
890 let args = pop_args(vm, argc);
891 Value::Status(crate::extensions::ext_builtins::prompt(&args))
892 });
893
894 // Async / Parallel (zshrs extensions)
895 vm.register_builtin(BUILTIN_ASYNC, |vm, argc| {
896 let args = pop_args(vm, argc);
897 let status = with_executor(|exec| exec.builtin_async(&args));
898 Value::Status(status)
899 });
900
901 vm.register_builtin(BUILTIN_AWAIT, |vm, argc| {
902 let args = pop_args(vm, argc);
903 let status = with_executor(|exec| exec.builtin_await(&args));
904 Value::Status(status)
905 });
906
907 vm.register_builtin(BUILTIN_PMAP, |vm, argc| {
908 let args = pop_args(vm, argc);
909 let status = with_executor(|exec| exec.builtin_pmap(&args));
910 Value::Status(status)
911 });
912
913 vm.register_builtin(BUILTIN_PGREP, |vm, argc| {
914 let args = pop_args(vm, argc);
915 let status = with_executor(|exec| exec.builtin_pgrep(&args));
916 Value::Status(status)
917 });
918
919 vm.register_builtin(BUILTIN_PEACH, |vm, argc| {
920 let args = pop_args(vm, argc);
921 let status = with_executor(|exec| exec.builtin_peach(&args));
922 Value::Status(status)
923 });
924
925 vm.register_builtin(BUILTIN_BARRIER, |vm, argc| {
926 let args = pop_args(vm, argc);
927 let status = with_executor(|exec| exec.builtin_barrier(&args));
928 Value::Status(status)
929 });
930
931 // Intercept (AOP)
932 vm.register_builtin(BUILTIN_INTERCEPT, |vm, argc| {
933 let args = pop_args(vm, argc);
934 let status = with_executor(|exec| exec.builtin_intercept(&args));
935 Value::Status(status)
936 });
937
938 vm.register_builtin(BUILTIN_INTERCEPT_PROCEED, |vm, argc| {
939 let args = pop_args(vm, argc);
940 let status = with_executor(|exec| exec.builtin_intercept_proceed(&args));
941 Value::Status(status)
942 });
943
944 // Debug / Profile
945 vm.register_builtin(BUILTIN_DOCTOR, |vm, argc| {
946 let args = pop_args(vm, argc);
947 let status = with_executor(|exec| exec.builtin_doctor(&args));
948 Value::Status(status)
949 });
950
951 vm.register_builtin(BUILTIN_DBVIEW, |vm, argc| {
952 let args = pop_args(vm, argc);
953 let status = with_executor(|exec| exec.builtin_dbview(&args));
954 Value::Status(status)
955 });
956
957 vm.register_builtin(BUILTIN_PROFILE, |vm, argc| {
958 let args = pop_args(vm, argc);
959 let status = with_executor(|exec| exec.builtin_profile(&args));
960 Value::Status(status)
961 });
962
963 vm.register_builtin(BUILTIN_ZPROF, |vm, argc| {
964 let args = pop_args(vm, argc);
965 // bin_zprof now takes the canonical C signature
966 // (name, args, ops, func) per Src/Modules/zprof.c:139.
967 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
968 argscount: 0, argsalloc: 0 };
969 if args.iter().any(|a| a == "-c") { ops.ind[b'c' as usize] = 1; }
970 let _ = with_executor(|_exec| ());
971 let status = crate::modules::zprof::bin_zprof("zprof", &args, &ops, 0);
972 Value::Status(status)
973 });
974
975 // ═══════════════════════════════════════════════════════════════════════
976 // Coreutils builtins (anti-fork, gated by !posix_mode)
977 //
978 // All of these are routinely wrapped by user functions in real
979 // dotfiles (zpwr, oh-my-zsh, etc.) — `cat() { ... }`, `ls() { ... }`,
980 // `find() { ... }`. Each handler MUST consult try_user_fn_override
981 // first (via reg_overridable!) so the user definition wins, matching
982 // zsh's alias → function → builtin dispatch order.
983 // ═══════════════════════════════════════════════════════════════════════
984
985 reg_overridable!(vm, BUILTIN_CAT, "cat", builtin_cat);
986 reg_overridable!(vm, BUILTIN_HEAD, "head", builtin_head);
987 reg_overridable!(vm, BUILTIN_TAIL, "tail", builtin_tail);
988 reg_overridable!(vm, BUILTIN_WC, "wc", builtin_wc);
989 reg_overridable!(vm, BUILTIN_BASENAME, "basename", builtin_basename);
990 reg_overridable!(vm, BUILTIN_DIRNAME, "dirname", builtin_dirname);
991 reg_overridable!(vm, BUILTIN_TOUCH, "touch", builtin_touch);
992 reg_overridable!(vm, BUILTIN_REALPATH, "realpath", builtin_realpath);
993 reg_overridable!(vm, BUILTIN_SORT, "sort", builtin_sort);
994 reg_overridable!(vm, BUILTIN_FIND, "find", builtin_find);
995 reg_overridable!(vm, BUILTIN_UNIQ, "uniq", builtin_uniq);
996 reg_overridable!(vm, BUILTIN_CUT, "cut", builtin_cut);
997 reg_overridable!(vm, BUILTIN_TR, "tr", builtin_tr);
998 reg_overridable!(vm, BUILTIN_SEQ, "seq", builtin_seq);
999 reg_overridable!(vm, BUILTIN_REV, "rev", builtin_rev);
1000 reg_overridable!(vm, BUILTIN_TEE, "tee", builtin_tee);
1001 reg_overridable!(vm, BUILTIN_SLEEP, "sleep", builtin_sleep);
1002 reg_overridable!(vm, BUILTIN_WHOAMI, "whoami", builtin_whoami);
1003 reg_overridable!(vm, BUILTIN_ID, "id", builtin_id);
1004
1005 reg_overridable!(vm, BUILTIN_HOSTNAME, "hostname", builtin_hostname);
1006 reg_overridable!(vm, BUILTIN_UNAME, "uname", builtin_uname);
1007 reg_overridable!(vm, BUILTIN_DATE, "date", builtin_date);
1008 reg_overridable!(vm, BUILTIN_MKTEMP, "mktemp", builtin_mktemp);
1009
1010 // BUILTIN_EXPAND_WORD_RUNTIME (id 281) was a legacy JSON round-trip
1011 // bridge that no chunk emits anymore. The constant + handler are
1012 // removed; the ID stays reserved in the gap before
1013 // BUILTIN_REGISTER_FUNCTION so future remaps don't reuse it.
1014
1015 // Pipeline execution — bytecode-native fork-per-stage. Pops N sub-chunk
1016 // indices, forks N children with stdin/stdout wired through N-1 pipes,
1017 // each child runs its stage's compiled bytecode and exits. Parent waits
1018 // and returns the last stage's status.
1019 //
1020 // Caveats: post-fork in a multi-threaded program, only async-signal-safe
1021 // ops are POSIX-safe. We violate this (running the bytecode VM after fork
1022 // touches mutexes like REGEX_CACHE). In practice, most pipeline stages
1023 // don't touch shared mutex state — externals fork/exec away, builtins do
1024 // pure I/O. Risks are bounded; if a stage does touch a held mutex, the
1025 // child deadlocks.
1026 vm.register_builtin(BUILTIN_RUN_PIPELINE, |vm, argc| {
1027 let n = argc as usize;
1028 if n == 0 {
1029 return Value::Status(0);
1030 }
1031
1032 // Pop N sub-chunk indices (LIFO → reverse to stage order)
1033 let mut indices: Vec<u16> = Vec::with_capacity(n);
1034 for _ in 0..n {
1035 indices.push(vm.pop().to_int() as u16);
1036 }
1037 indices.reverse();
1038
1039 // Clone each stage's sub-chunk
1040 let stages: Vec<fusevm::Chunk> = indices
1041 .iter()
1042 .filter_map(|&i| vm.chunk.sub_chunks.get(i as usize).cloned())
1043 .collect();
1044 if stages.len() != n {
1045 return Value::Status(1);
1046 }
1047
1048 // Single stage — no pipe, just run inline
1049 if n == 1 {
1050 let stage = stages.into_iter().next().unwrap();
1051 crate::fusevm_disasm::maybe_print_stdout("pipeline:single", &stage);
1052 let mut stage_vm = fusevm::VM::new(stage);
1053 register_builtins(&mut stage_vm);
1054 let _ = stage_vm.run();
1055 return Value::Status(stage_vm.last_status);
1056 }
1057
1058 // Build N-1 pipes
1059 let mut pipes: Vec<(i32, i32)> = Vec::with_capacity(n - 1);
1060 for _ in 0..n - 1 {
1061 let mut fds = [0i32; 2];
1062 if unsafe { libc::pipe(fds.as_mut_ptr()) } < 0 {
1063 // Cleanup any pipes we already created
1064 for (r, w) in &pipes {
1065 unsafe {
1066 libc::close(*r);
1067 libc::close(*w);
1068 }
1069 }
1070 return Value::Status(1);
1071 }
1072 pipes.push((fds[0], fds[1]));
1073 }
1074
1075 // zsh runs the LAST stage of a pipeline in the CURRENT shell
1076 // (not a forked child) so a trailing `read x` keeps its
1077 // assignment in the parent. Other shells (bash) fork every
1078 // stage. Honor zsh by leaving stage N-1 inline. Forks the
1079 // first N-1 stages with fork(); runs the last in this process
1080 // with stdin dup2'd to the last pipe's read end and stdout
1081 // restored after.
1082 let last_idx = n - 1;
1083 let stages_vec: Vec<fusevm::Chunk> = stages.into_iter().collect();
1084
1085 let mut child_pids: Vec<libc::pid_t> = Vec::with_capacity(n - 1);
1086 for (i, chunk) in stages_vec.iter().take(last_idx).enumerate() {
1087 match unsafe { libc::fork() } {
1088 -1 => {
1089 // fork failed — kill any children we already started
1090 for pid in &child_pids {
1091 unsafe { libc::kill(*pid, libc::SIGTERM) };
1092 }
1093 for (r, w) in &pipes {
1094 unsafe {
1095 libc::close(*r);
1096 libc::close(*w);
1097 }
1098 }
1099 return Value::Status(1);
1100 }
1101 0 => {
1102 // Reset SIGPIPE to default so a broken-pipe write
1103 // kills the child cleanly instead of triggering a
1104 // Rust println! panic. The parent shell ignores
1105 // SIGPIPE so it can handle EPIPE itself, but child
1106 // pipeline stages should die quietly when their
1107 // downstream stage closes early (e.g. `seq | head -3`).
1108 unsafe {
1109 libc::signal(libc::SIGPIPE, libc::SIG_DFL);
1110 }
1111 // Child: wire stdin from previous pipe's read end
1112 if i > 0 {
1113 unsafe {
1114 libc::dup2(pipes[i - 1].0, libc::STDIN_FILENO);
1115 }
1116 }
1117 // Wire stdout to next pipe's write end
1118 unsafe {
1119 libc::dup2(pipes[i].1, libc::STDOUT_FILENO);
1120 }
1121 // Close all original pipe fds (keeping stdin/stdout dups)
1122 for (r, w) in &pipes {
1123 unsafe {
1124 libc::close(*r);
1125 libc::close(*w);
1126 }
1127 }
1128
1129 // Run this stage's bytecode on a fresh VM
1130 crate::fusevm_disasm::maybe_print_stdout(
1131 &format!("pipeline:child:stage:{i}"),
1132 chunk,
1133 );
1134 let mut stage_vm = fusevm::VM::new(chunk.clone());
1135 register_builtins(&mut stage_vm);
1136 let _ = stage_vm.run();
1137 // Flush any buffered output before exiting
1138 let _ = std::io::stdout().flush();
1139 let _ = std::io::stderr().flush();
1140 std::process::exit(stage_vm.last_status);
1141 }
1142 pid => {
1143 child_pids.push(pid);
1144 }
1145 }
1146 }
1147
1148 // Parent runs the LAST stage inline. Save stdin, dup the last
1149 // pipe's read end onto fd 0, run the chunk, restore stdin.
1150 // Close every other pipe fd so the producer side gets EOF
1151 // when the last upstream stage exits.
1152 let saved_stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1153 if last_idx > 0 {
1154 let read_fd = pipes[last_idx - 1].0;
1155 unsafe {
1156 libc::dup2(read_fd, libc::STDIN_FILENO);
1157 }
1158 }
1159 // Close all pipe fds in the parent now that stdin is wired.
1160 // (Children already have their own copies. The dup2 above
1161 // already gave us a fresh fd 0 if needed.)
1162 for (r, w) in &pipes {
1163 unsafe {
1164 libc::close(*r);
1165 libc::close(*w);
1166 }
1167 }
1168
1169 // Run the last stage's bytecode on a sub-VM with the host
1170 // wired up. The host points back at the executor so reads
1171 // (`read x`) update the parent's variables directly.
1172 let last_stage_status = {
1173 let last_chunk = stages_vec.into_iter().last().unwrap();
1174 crate::fusevm_disasm::maybe_print_stdout("pipeline:last", &last_chunk);
1175 let mut stage_vm = fusevm::VM::new(last_chunk);
1176 register_builtins(&mut stage_vm);
1177 stage_vm.set_shell_host(Box::new(ZshrsHost));
1178 let _ = stage_vm.run();
1179 let _ = std::io::stdout().flush();
1180 let _ = std::io::stderr().flush();
1181 stage_vm.last_status
1182 };
1183
1184 // Restore stdin
1185 if saved_stdin >= 0 {
1186 unsafe {
1187 libc::dup2(saved_stdin, libc::STDIN_FILENO);
1188 libc::close(saved_stdin);
1189 }
1190 }
1191
1192 // Wait for all forked stages, capture per-stage statuses for PIPESTATUS.
1193 let mut pipestatus: Vec<i32> = Vec::with_capacity(n);
1194 for pid in child_pids {
1195 let mut status: i32 = 0;
1196 unsafe {
1197 libc::waitpid(pid, &mut status, 0);
1198 }
1199 let s = if libc::WIFEXITED(status) {
1200 libc::WEXITSTATUS(status)
1201 } else if libc::WIFSIGNALED(status) {
1202 128 + libc::WTERMSIG(status)
1203 } else {
1204 1
1205 };
1206 pipestatus.push(s);
1207 }
1208 // Append the in-parent last-stage status so `pipestatus` ends
1209 // with N entries (one per stage).
1210 pipestatus.push(last_stage_status);
1211 // Pipeline exit status: by default, the LAST stage's status.
1212 // With `setopt pipefail` (or `set -o pipefail`), use the
1213 // first non-zero stage status (so failures earlier in the
1214 // pipeline propagate even if the last stage succeeded).
1215 let pipefail_on =
1216 with_executor(|exec| crate::ported::options::opt_state_get("pipefail").unwrap_or(false));
1217 let last_status = if pipefail_on {
1218 pipestatus
1219 .iter()
1220 .copied()
1221 .rfind(|&s| s != 0)
1222 .or_else(|| pipestatus.last().copied())
1223 .unwrap_or(0)
1224 } else {
1225 *pipestatus.last().unwrap_or(&0)
1226 };
1227
1228 // Populate `pipestatus` (zsh) and `PIPESTATUS` (bash) arrays so
1229 // scripts can inspect per-stage exit codes. Both names are common
1230 // in user code; populating both removes a portability foot-gun.
1231 with_executor(|exec| {
1232 let strs: Vec<String> = pipestatus.iter().map(|s| s.to_string()).collect();
1233 exec.set_array("pipestatus".to_string(), strs.clone());
1234 exec.set_array("PIPESTATUS".to_string(), strs);
1235 });
1236
1237 Value::Status(last_status)
1238 });
1239
1240 // Array→String join. Pops one value; if it's an Array (e.g. from Op::Glob),
1241 // joins string-coerced elements with a single space. Pass-through for
1242 // non-arrays so the op is safe to chain after any String-or-Array producer.
1243 vm.register_builtin(BUILTIN_ARRAY_JOIN, |vm, _argc| {
1244 let val = vm.pop();
1245 match val {
1246 fusevm::Value::Array(items) => {
1247 let parts: Vec<String> = items.iter().map(|v| v.to_str()).collect();
1248 fusevm::Value::str(parts.join(" "))
1249 }
1250 other => other,
1251 }
1252 });
1253
1254 // `cmd &` background execution. Compile_list emits this for any item
1255 // followed by ListOp::Amp: the cmd is compiled into a sub-chunk, its index
1256 // pushed, then this builtin pops the index, looks up the chunk, forks. The
1257 // child detaches via setsid (so SIGINT to the foreground job doesn't kill
1258 // it), runs the bytecode on a fresh VM with builtins re-registered, exits
1259 // with the last status. The parent returns Status(0) immediately. Job
1260 // tracking via JobTable is deferred to Phase G6 — JobTable::add_job
1261 // currently requires a std::process::Child, which a libc::fork doesn't
1262 // produce. Until then, `jobs`/`fg`/`wait` can't see these pids.
1263 //WARNING FAKE AND MUST BE DELETED
1264 vm.register_builtin(BUILTIN_RUN_BG, |vm, _argc| {
1265 let sub_idx = vm.pop().to_int() as usize;
1266 let chunk = match vm.chunk.sub_chunks.get(sub_idx).cloned() {
1267 Some(c) => c,
1268 None => return Value::Status(1),
1269 };
1270
1271 match unsafe { libc::fork() } {
1272 -1 => Value::Status(1),
1273 0 => {
1274 // Child: detach and run.
1275 unsafe { libc::setsid() };
1276 crate::fusevm_disasm::maybe_print_stdout("background_job", &chunk);
1277 let mut bg_vm = fusevm::VM::new(chunk);
1278 register_builtins(&mut bg_vm);
1279 let _ = bg_vm.run();
1280 let _ = std::io::stdout().flush();
1281 let _ = std::io::stderr().flush();
1282 std::process::exit(bg_vm.last_status);
1283 }
1284 pid => {
1285 // Parent: record the PID into `$!` (most recent
1286 // backgrounded job's pid). zsh exposes this for any
1287 // script that needs `wait $!`. Also register the
1288 // bare-pid job so a no-args `wait` can synchronize.
1289 with_executor(|exec| {
1290 exec.set_scalar("!".to_string(), pid.to_string());
1291 exec.jobs.add_pid_job(
1292 pid,
1293 String::new(),
1294 crate::exec_jobs::JobState::Running,
1295 );
1296 });
1297 Value::Status(0)
1298 }
1299 }
1300 });
1301
1302 // ── Indexed-array storage and access ──────────────────────────────────
1303 //
1304 // Two calling conventions:
1305 // 1. `arr=(a b c)` → push "a", "b", "c", "arr"; CallBuiltin(SET_ARRAY, 4).
1306 // 2. `arr=($(cmd))` → push FlatArray, "arr"; CallBuiltin(SET_ARRAY, 2)
1307 // where FlatArray is a Value::Array of words after BUILTIN_ARRAY_FLATTEN
1308 // + WORD_SPLIT processing.
1309 // Both end with name as the LAST arg. Values may be a single Value::Array
1310 // (in which case we extract its elements) or a sequence of strings.
1311 //WARNING FAKE AND MUST BE DELETED
1312 vm.register_builtin(BUILTIN_SET_ARRAY, |vm, argc| {
1313 let n = argc as usize;
1314 let mut popped: Vec<fusevm::Value> = Vec::with_capacity(n);
1315 for _ in 0..n {
1316 popped.push(vm.pop());
1317 }
1318 popped.reverse();
1319 if popped.is_empty() {
1320 return Value::Status(1);
1321 }
1322 let name = popped.pop().unwrap().to_str();
1323 let mut values: Vec<String> = Vec::new();
1324 for v in popped {
1325 match v {
1326 fusevm::Value::Array(items) => {
1327 for it in items {
1328 values.push(it.to_str());
1329 }
1330 }
1331 other => values.push(other.to_str()),
1332 }
1333 }
1334 let blocked = with_executor(|exec| {
1335 // Refuse to mutate read-only arrays (declare -ra / typeset
1336 // -ra). zsh prints `read-only variable: NAME` and exits 1
1337 // in -c mode. Mirror that fatal behavior.
1338 let is_ro = exec.is_readonly_param(&name);
1339 if is_ro {
1340 eprintln!("zshrs:1: read-only variable: {}", name);
1341 std::process::exit(1);
1342 }
1343 // Two-statement assoc init: `typeset -A m; m=(k v k v ...)`.
1344 if exec.assoc(&name).is_some() {
1345 // zsh: odd number of values -> `bad set of key/value
1346 // pairs for associative array` exit 1, no
1347 // assignment. zshrs's `if let Some(v) = it.next()`
1348 // silently dropped the orphaned key.
1349 if !values.len().is_multiple_of(2) {
1350 eprintln!("zshrs:1: bad set of key/value pairs for associative array");
1351 return true;
1352 }
1353 let mut map: IndexMap<String, String> = IndexMap::new();
1354 let mut it = values.clone().into_iter();
1355 while let Some(k) = it.next() {
1356 if let Some(v) = it.next() {
1357 map.insert(k, v);
1358 }
1359 }
1360 exec.set_assoc(name.clone(), map);
1361 // PFA-SMR aspect: assoc bulk init `h=(k1 v1 k2 v2 ...)`.
1362 // Recorder emits a structured assoc event with the
1363 // ordered (key, value) pairs preserved in
1364 // `value_assoc` so replay can reconstruct the assoc
1365 // exactly — insertion order matters because zsh
1366 // associative arrays are insertion-ordered (via
1367 // IndexMap on the executor side).
1368 #[cfg(feature = "recorder")]
1369 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
1370 let ctx = exec.recorder_ctx();
1371 let attrs = exec.recorder_attrs_for(&name);
1372 let mut pairs: Vec<(String, String)> = Vec::with_capacity(values.len() / 2);
1373 let mut iter = values.iter().cloned();
1374 while let Some(k) = iter.next() {
1375 if let Some(v) = iter.next() {
1376 pairs.push((k, v));
1377 }
1378 }
1379 crate::recorder::emit_assoc_assign(&name, pairs, attrs, false, ctx);
1380 }
1381 return false;
1382 }
1383 // Mirror array→scalar if name is the array side of a typeset -T tie.
1384 // `typeset -U arr` dedupes; first-wins per zsh.
1385 let is_unique = (exec.param_flags(&name) as u32 & crate::ported::zsh_h::PM_UNIQUE) != 0;
1386 if is_unique {
1387 let mut seen = std::collections::HashSet::new();
1388 values.retain(|v| seen.insert(v.clone()));
1389 }
1390 if let Some((scalar_name, sep)) = exec.tied_array_to_scalar.get(&name).cloned() {
1391 let joined = values.join(&sep);
1392 exec.set_scalar(scalar_name, joined);
1393 exec.set_array(name.clone(), values.clone());
1394 } else {
1395 exec.set_array(name.clone(), values.clone());
1396 }
1397 // PFA-SMR aspect: array SET (`name=(...)`). emit_path_or_assign
1398 // routes path-family names to per-element path_mod events
1399 // and everything else to one structured array `assign`
1400 // event with value_array = ordered elements (replay-safe).
1401 #[cfg(feature = "recorder")]
1402 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
1403 let ctx = exec.recorder_ctx();
1404 let attrs = exec.recorder_attrs_for(&name);
1405 emit_path_or_assign(&name, &values, attrs, false, &ctx);
1406 }
1407 false
1408 });
1409 Value::Status(if blocked { 1 } else { 0 })
1410 });
1411 // `arr+=(d e f)` — append. Same calling conventions as SET_ARRAY.
1412 //WARNING FAKE AND MUST BE DELETED
1413 vm.register_builtin(BUILTIN_APPEND_ARRAY, |vm, argc| {
1414 let n = argc as usize;
1415 let mut popped: Vec<fusevm::Value> = Vec::with_capacity(n);
1416 for _ in 0..n {
1417 popped.push(vm.pop());
1418 }
1419 popped.reverse();
1420 if popped.is_empty() {
1421 return Value::Status(1);
1422 }
1423 let name = popped.pop().unwrap().to_str();
1424 let mut values: Vec<String> = Vec::new();
1425 for v in popped {
1426 match v {
1427 fusevm::Value::Array(items) => {
1428 for it in items {
1429 values.push(it.to_str());
1430 }
1431 }
1432 other => values.push(other.to_str()),
1433 }
1434 }
1435 with_executor(|exec| {
1436 // Refuse appends on read-only arrays (declare -ra).
1437 let is_ro = exec.is_readonly_param(&name);
1438 if is_ro {
1439 eprintln!("zshrs:1: read-only variable: {}", name);
1440 std::process::exit(1);
1441 }
1442 // Assoc-aware append: `typeset -A m; m+=(k1 v1 k2 v2 ...)`
1443 // adds key/value pairs. Without this, the values were
1444 // appended to a parallel array and `${m[k]}` lookup missed
1445 // the new keys entirely.
1446 if exec.assoc(&name).is_some() {
1447 let mut map = exec.assoc(&name).unwrap_or_default();
1448 let mut it = values.into_iter();
1449 while let Some(k) = it.next() {
1450 if let Some(v) = it.next() {
1451 map.insert(k, v);
1452 }
1453 }
1454 exec.set_assoc(name, map);
1455 return;
1456 }
1457 // `typeset -U arr` dedupes — append must respect existing
1458 // elements too. Skip values that are already present.
1459 // PFA-SMR aspect: array APPEND (`name+=(...)`). Same
1460 // routing as SET_ARRAY but with is_append=true so the
1461 // event carries the APPEND attr bit for replay.
1462 #[cfg(feature = "recorder")]
1463 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
1464 let ctx = exec.recorder_ctx();
1465 let attrs = exec.recorder_attrs_for(&name);
1466 emit_path_or_assign(&name, &values, attrs, true, &ctx);
1467 }
1468 let is_unique = (exec.param_flags(&name) as u32 & crate::ported::zsh_h::PM_UNIQUE) != 0;
1469 // Mirror the post-append result back to a tied scalar
1470 // (`typeset -T PATH path :` — `path+=(/x)` must update
1471 // `PATH` too). Without this, zinit / OMZ patterns like
1472 // `path+=(/some/dir)` left $PATH stale, so `command -v`
1473 // / pathprog lookups missed newly-added dirs.
1474 let tied_scalar = exec.tied_array_to_scalar.get(&name).cloned();
1475 // Read current via canonical exec.array (paramtab-first),
1476 // mutate, then write back via set_array which writes both
1477 // paramtab and the legacy cache.
1478 let mut target = exec.array(&name).unwrap_or_default();
1479 if is_unique {
1480 let existing: std::collections::HashSet<String> = target.iter().cloned().collect();
1481 for v in values {
1482 if !existing.contains(&v) {
1483 target.push(v);
1484 }
1485 }
1486 } else {
1487 target.extend(values);
1488 }
1489 exec.set_array(name.clone(), target);
1490 if let Some((scalar_name, sep)) = tied_scalar {
1491 let joined = exec
1492 .array(&name)
1493 .map(|a| a.join(&sep))
1494 .unwrap_or_default();
1495 exec.set_scalar(scalar_name.clone(), joined.clone());
1496 // Keep the env var (PATH / FPATH / MANPATH / …) in
1497 // sync with the scalar so child processes see the
1498 // change.
1499 std::env::set_var(&scalar_name, &joined);
1500 }
1501 });
1502 Value::Status(0)
1503 });
1504
1505 // `select var in words; do body; done` — interactive menu loop. Stack
1506 // discipline (top-down): sub_chunk_idx (Int), var_name (str), word_N..word_1.
1507 // Argc = words_count + 2. We pop in reverse order: idx first, then name,
1508 // then words back to source order via reverse().
1509 //
1510 // Loop body:
1511 // 1. Print numbered menu to stderr.
1512 // 2. Print PROMPT3 (default "?# ") to stderr.
1513 // 3. Read line from stdin.
1514 // 4. EOF (read fails) → break, return Status(0).
1515 // 5. Empty line → redraw menu, loop.
1516 // 6. Numeric input in 1..=N → set var, run sub-chunk, capture status,
1517 // redraw menu, loop.
1518 // 7. Anything else → set var to "" (zsh convention), run sub-chunk,
1519 // redraw menu, loop. The body sees REPLY = the raw input.
1520 //
1521 // `break` inside the body short-circuits via the sub-chunk's own bytecode
1522 // (the break_patches mechanism). When the sub-chunk halts via break it
1523 // returns from VM::run; we treat any non-zero status as "loop should
1524 // exit"? No — break sets a flag in the chunk-level patches. Since we're
1525 // running the body in a fresh VM each iteration, break needs a different
1526 // signaling mechanism. For now: the body's bytecode can do `return 99`
1527 // which we recognize as a "user wants out" signal. zsh's `break` works
1528 // in select via the same loop-control mechanism as for/while. Phase G6
1529 // follow-up.
1530 //WARNING FAKE AND MUST BE DELETED
1531 vm.register_builtin(BUILTIN_RUN_SELECT, |vm, argc| {
1532
1533 if argc < 2 {
1534 return Value::Status(1);
1535 }
1536 let n = argc as usize;
1537 let mut popped: Vec<fusevm::Value> = Vec::with_capacity(n);
1538 for _ in 0..n {
1539 popped.push(vm.pop());
1540 }
1541 // popped: [sub_idx, name, word_N, ..., word_1] (popping from top)
1542 let sub_idx_val = popped.remove(0);
1543 let name_val = popped.remove(0);
1544 let mut words: Vec<String> = popped.into_iter().rev().map(|v| v.to_str()).collect();
1545 // Flatten any Value::Array elements (e.g. `select x in $arr; ...`).
1546 let mut flat = Vec::with_capacity(words.len());
1547 for w in words.drain(..) {
1548 // The pop above already to_str()'d, so Array splice is lost. Re-
1549 // pop wouldn't help — the host receives flat strings here. This is
1550 // OK for now since the compile path uses ARRAY_FLATTEN-equivalent
1551 // reasoning before the call. If splice support is needed, the
1552 // compile path should call BUILTIN_ARRAY_FLATTEN first.
1553 flat.push(w);
1554 }
1555 let words = flat;
1556
1557 let sub_idx = sub_idx_val.to_int() as usize;
1558 let name = name_val.to_str();
1559 let chunk = match vm.chunk.sub_chunks.get(sub_idx).cloned() {
1560 Some(c) => c,
1561 None => return Value::Status(1),
1562 };
1563
1564 let prompt = with_executor(|exec| {
1565 exec.scalar("PROMPT3")
1566 .unwrap_or_else(|| "?# ".to_string())
1567 });
1568
1569 let stdin = std::io::stdin();
1570 let mut reader = stdin.lock();
1571 let mut last_status: i32 = 0;
1572
1573 loop {
1574 // Direct port of zsh's selectlist from
1575 // src/zsh/Src/loop.c:347-409. Layout is column-major
1576 // ("down columns, then across") — NOT row-major. With
1577 // 6 items in 3 cols zsh produces:
1578 // 1 3 5
1579 // 2 4 6
1580 // The previous Rust impl walked row-major which
1581 // produced 1 2 3 / 4 5 6 (visually similar but wrong
1582 // for prompts that mention ordering and breaks scripts
1583 // that rely on column count == ceil(N/rows)).
1584 //
1585 // C variable mapping:
1586 // ct -> word count (n)
1587 // longest -> max item width + 1, then plus digits-of-ct
1588 // fct -> column count
1589 // fw -> per-column width
1590 // colsz -> row count = ceil(ct / fct)
1591 // t1 -> row index, walks 0..colsz
1592 // ap -> item pointer; advances by colsz to step
1593 // DOWN a column.
1594 let term_width: usize = std::env::var("COLUMNS")
1595 .ok()
1596 .and_then(|v| v.parse().ok())
1597 .unwrap_or(80);
1598 let ct = words.len();
1599 // loop.c:354-363 — find longest item width.
1600 let mut longest = 1usize;
1601 for w in &words {
1602 let aplen = w.chars().count();
1603 if aplen > longest {
1604 longest = aplen;
1605 }
1606 }
1607 // loop.c:365-367 — `longest++` then add digits of `ct`.
1608 longest += 1;
1609 let mut t0 = ct;
1610 while t0 > 0 {
1611 t0 /= 10;
1612 longest += 1;
1613 }
1614 // loop.c:369-373 — fct = (cols - 1) / (longest + 3); if
1615 // 0, fct = 1; else fw = (cols - 1) / fct.
1616 let raw_fct = (term_width.saturating_sub(1)) / (longest + 3);
1617 let (fct, fw) = if raw_fct == 0 {
1618 (1, longest + 3)
1619 } else {
1620 (raw_fct, (term_width.saturating_sub(1)) / raw_fct)
1621 };
1622 // loop.c:374 — colsz = (ct + fct - 1) / fct.
1623 let colsz = ct.div_ceil(fct);
1624 // loop.c:375-395 — for each row t1, walk down columns.
1625 for t1 in 0..colsz {
1626 let mut ap_idx = t1;
1627 while ap_idx < ct {
1628 let w = &words[ap_idx];
1629 let n = ap_idx + 1;
1630 let _ = write!(std::io::stderr(), "{}) {}", n, w);
1631 let mut t2 = w.chars().count() + 2;
1632 let mut t3 = n;
1633 while t3 > 0 {
1634 t2 += 1;
1635 t3 /= 10;
1636 }
1637 // Pad to fw (loop.c:389-390).
1638 while t2 < fw {
1639 let _ = write!(std::io::stderr(), " ");
1640 t2 += 1;
1641 }
1642 ap_idx += colsz;
1643 }
1644 let _ = writeln!(std::io::stderr());
1645 }
1646 let _ = write!(std::io::stderr(), "{}", prompt);
1647 let _ = std::io::stderr().flush();
1648
1649 let mut line = String::new();
1650 match reader.read_line(&mut line) {
1651 Ok(0) => break, // EOF
1652 Ok(_) => {}
1653 Err(_) => break,
1654 }
1655 let trimmed = line.trim_end_matches(['\n', '\r'][..].as_ref()).to_string();
1656
1657 with_executor(|exec| {
1658 exec.set_scalar("REPLY".to_string(), trimmed.clone());
1659 });
1660
1661 if trimmed.is_empty() {
1662 // Empty input → redraw menu without running body.
1663 continue;
1664 }
1665
1666 let chosen = match trimmed.parse::<usize>() {
1667 Ok(n) if n >= 1 && n <= words.len() => words[n - 1].clone(),
1668 _ => String::new(),
1669 };
1670
1671 with_executor(|exec| {
1672 exec.set_scalar(name.clone(), chosen);
1673 });
1674
1675 // Reset the loop signal before running the body so a stale
1676 // value from a sibling construct doesn't leak in.
1677 with_executor(|exec| exec.loop_signal = None);
1678
1679 crate::fusevm_disasm::maybe_print_stdout("select:body", &chunk);
1680 let mut body_vm = fusevm::VM::new(chunk.clone());
1681 register_builtins(&mut body_vm);
1682 let _ = body_vm.run();
1683 last_status = body_vm.last_status;
1684
1685 // Drain the cross-VM loop-control signal. `break` from inside
1686 // the body sets LoopSignal::Break; `continue` sets Continue.
1687 // The legacy `BREAK_SELECT=1` env-var sentinel is still honored
1688 // for backward compat with scripts written before the keyword
1689 // path landed.
1690 let signal = with_executor(|exec| exec.loop_signal.take());
1691 let break_legacy = with_executor(|exec| {
1692 let v = exec.scalar("BREAK_SELECT");
1693 exec.unset_scalar("BREAK_SELECT");
1694 v.map(|s| s != "0" && !s.is_empty()).unwrap_or(false)
1695 });
1696 match signal {
1697 Some(LoopSignal::Break) => break,
1698 Some(LoopSignal::Continue) => continue,
1699 None if break_legacy => break,
1700 None => {}
1701 }
1702 }
1703
1704 Value::Status(last_status)
1705 });
1706
1707 // Magic special-parameter assoc lookup. Synthesizes values from
1708 // shell state for zsh's shell-introspection assocs:
1709 // commands, aliases, galiases, saliases, dis_aliases, dis_galiases,
1710 // dis_saliases, functions, dis_functions, builtins, dis_builtins,
1711 // reswords, options, parameters, jobtexts, jobdirs, jobstates,
1712 // nameddirs, userdirs, modules.
1713 // Returns None if `name` isn't a recognized magic name.
1714 //WARNING FAKE AND MUST BE DELETED
1715 fn magic_assoc_lookup(name: &str, idx: &str) -> Option<Value> {
1716 // Subscript-flag lookup `(r)pat` / `(R)pat` / `(i)pat` /
1717 // `(I)pat` on a magic-assoc — synthesize the (key,value)
1718 // pair list from get_special_array_value and route through
1719 // the assoc-flag matcher (same path real assocs use).
1720 // Direct port of Src/params.c getarg's hash-aware index/
1721 // match handling — without this, `${aliases[(I)foo*]}` and
1722 // friends were passing the literal `(I)foo*` text through
1723 // as the key.
1724 // Magic-assoc subscript flags (I)/(R)/(i)/(r): parse the
1725 // leading `(...)` flag tag and dispatch by-key (I/i) or
1726 // by-value (R/r) glob match. Capital = return all matches
1727 // joined by space; lowercase = return first only.
1728 // Direct port of Src/params.c getarg path which routes
1729 // hash subscripts through pattern matching when the flag
1730 // tag is present.
1731 let parsed_flags: Option<(String, String)> = (|s: &str| {
1732 let s = s.trim_start();
1733 let rest = s.strip_prefix('(')?;
1734 let close = rest.find(')')?;
1735 let flags = rest[..close].to_string();
1736 let pat = rest[close + 1..].to_string();
1737 if flags.chars().next().is_some_and(|c| matches!(c, 'I' | 'R' | 'i' | 'r' | 'k' | 'K' | 'n' | 'e' | 'b' | 'w' | 'f' | 'p' | 's')) {
1738 Some((flags, pat))
1739 } else { None }
1740 })(idx);
1741 if let Some((flags, pat)) = parsed_flags.clone() {
1742 let pairs = with_executor(|exec| -> Option<Vec<(String, String)>> {
1743 let keys = crate::exec::scan_magic_assoc_keys(name)?;
1744 Some(keys
1745 .into_iter()
1746 .map(|k| {
1747 let v = exec
1748 .get_special_array_value(name, &k)
1749 .unwrap_or_default();
1750 (k, v)
1751 })
1752 .collect())
1753 });
1754 if let Some(pairs) = pairs {
1755 let by_key = flags.contains('I') || flags.contains('i');
1756 let return_all = flags.contains('I') || flags.contains('R');
1757 let mut out: Vec<String> = Vec::new();
1758 for (k, v) in &pairs {
1759 let hay = if by_key { k } else { v };
1760 if crate::exec::glob_match_static(hay, &pat) {
1761 out.push(if by_key { k.clone() } else { v.clone() });
1762 if !return_all { break; }
1763 }
1764 }
1765 return Some(Value::str(out.join(" ")));
1766 }
1767 }
1768 with_executor(|exec| -> Option<Value> {
1769 match name {
1770 "commands" => {
1771 // Canonical command-hash is `cmdnamtab` — HASHED
1772 // entries store their resolved path in `cmd`.
1773 let tab = crate::ported::hashtable::cmdnamtab_lock();
1774 if idx == "@" || idx == "*" {
1775 return Some(Value::Array(
1776 tab.read().ok()
1777 .map(|t| t.iter()
1778 .filter_map(|(_, c)| c.cmd.clone())
1779 .map(Value::str)
1780 .collect())
1781 .unwrap_or_default(),
1782 ));
1783 }
1784 Some(Value::str(
1785 tab.read().ok()
1786 .and_then(|t| t.get_full_path(idx).map(|p| p.display().to_string()))
1787 .unwrap_or_else(|| {
1788 // Fall back to PATH scan for first match
1789 for dir in env::var("PATH").unwrap_or_default().split(':') {
1790 let p = std::path::PathBuf::from(dir).join(idx);
1791 if p.is_file() {
1792 return p.to_string_lossy().into_owned();
1793 }
1794 }
1795 String::new()
1796 }),
1797 ))
1798 }
1799 "aliases" | "galiases" | "saliases" => Some(Value::str(
1800 exec.get_special_array_value(name, idx).unwrap_or_default(),
1801 )),
1802 "functions" => {
1803 if let Some(text) = exec.function_definition_text(idx) {
1804 // zsh's `$functions[name]` returns the function
1805 // body with each statement on its own line and a
1806 // leading TAB on every line (no trailing `;`).
1807 // Was returning the raw user-typed source which
1808 // diverges on indent and terminator. Direct port
1809 // of Src/exec.c's `getfn_functions` formatter.
1810 let formatted = FuncBodyFmt::render(text.trim());
1811 Some(Value::str(format!("\t{}", formatted)))
1812 } else {
1813 Some(Value::str(""))
1814 }
1815 }
1816 "dis_functions" => {
1817 // Disabled functions table — zshrs tracks via autoload_pending
1818 // for the autoload-but-not-loaded case; full disable list
1819 // would need a separate table. For now: empty unless
1820 // explicitly disabled.
1821 Some(Value::str(""))
1822 }
1823 "builtins" => {
1824 // Return "defined" for known builtins; empty for unknown
1825 let known = matches!(
1826 idx,
1827 "echo"
1828 | "print"
1829 | "printf"
1830 | "cd"
1831 | "pwd"
1832 | "exit"
1833 | "return"
1834 | "true"
1835 | "false"
1836 | ":"
1837 | "test"
1838 | "["
1839 | "local"
1840 | "private"
1841 | "declare"
1842 | "typeset"
1843 | "read"
1844 | "shift"
1845 | "eval"
1846 | "alias"
1847 | "unalias"
1848 | "set"
1849 | "unset"
1850 | "export"
1851 | "source"
1852 | "."
1853 | "history"
1854 | "fc"
1855 | "jobs"
1856 | "fg"
1857 | "bg"
1858 | "kill"
1859 | "wait"
1860 | "trap"
1861 | "ulimit"
1862 | "umask"
1863 | "hash"
1864 | "unhash"
1865 | "type"
1866 | "whence"
1867 | "which"
1868 | "where"
1869 | "command"
1870 | "builtin"
1871 | "exec"
1872 | "getopts"
1873 | "let"
1874 | "setopt"
1875 | "unsetopt"
1876 | "emulate"
1877 | "zstyle"
1878 | "compdef"
1879 | "compadd"
1880 | "compinit"
1881 | "compset"
1882 );
1883 if known {
1884 Some(Value::str("defined"))
1885 } else {
1886 Some(Value::str(""))
1887 }
1888 }
1889 "reswords" => {
1890 let known = matches!(
1891 idx,
1892 "if" | "then"
1893 | "elif"
1894 | "else"
1895 | "fi"
1896 | "for"
1897 | "do"
1898 | "done"
1899 | "while"
1900 | "until"
1901 | "case"
1902 | "esac"
1903 | "in"
1904 | "function"
1905 | "select"
1906 | "time"
1907 | "{"
1908 | "}"
1909 | "[["
1910 | "]]"
1911 | "!"
1912 | "coproc"
1913 | "always"
1914 | "foreach"
1915 | "end"
1916 | "repeat"
1917 | "nocorrect"
1918 | "noglob"
1919 | "declare"
1920 | "typeset"
1921 | "local"
1922 | "readonly"
1923 | "export"
1924 | "integer"
1925 | "float"
1926 );
1927 if known {
1928 Some(Value::str("reserved"))
1929 } else {
1930 Some(Value::str(""))
1931 }
1932 }
1933 "options" => {
1934 let opt_name = idx.to_lowercase().replace('_', "");
1935 Some(Value::str(
1936 if crate::ported::options::opt_state_get(&opt_name).unwrap_or(false) {
1937 "on"
1938 } else {
1939 "off"
1940 },
1941 ))
1942 }
1943 "parameters" => {
1944 // ${parameters[name]} returns the type with all
1945 // attributes joined by `-`. Delegates to
1946 // `get_special_array_value` which reads PM_TYPE
1947 // / PM_LOWER / PM_READONLY / etc. flags from the
1948 // canonical paramtab entry.
1949 Some(Value::str(
1950 exec.get_special_array_value("parameters", idx)
1951 .unwrap_or_default()))
1952 }
1953 "jobtexts" => {
1954 let job_id: usize = idx.parse().ok()?;
1955 Some(Value::str(
1956 exec.jobs
1957 .get(job_id)
1958 .map(|j| j.command.clone())
1959 .unwrap_or_default(),
1960 ))
1961 }
1962 "jobdirs" => {
1963 let _job_id: usize = idx.parse().ok()?;
1964 // Per-job working dir not tracked; return current cwd as
1965 // a useful approximation (zsh tracks it; we don't yet).
1966 Some(Value::str(
1967 std::env::current_dir()
1968 .ok()
1969 .and_then(|p| p.to_str().map(String::from))
1970 .unwrap_or_default(),
1971 ))
1972 }
1973 "jobstates" => {
1974 let job_id: usize = idx.parse().ok()?;
1975 Some(Value::str(
1976 exec.jobs
1977 .get(job_id)
1978 .map(|j| match j.state {
1979 JobState::Running => "running".to_string(),
1980 JobState::Stopped => "stopped".to_string(),
1981 JobState::Done => "done".to_string(),
1982 })
1983 .unwrap_or_default(),
1984 ))
1985 }
1986 "nameddirs" => Some(Value::str(
1987 crate::ported::hashnameddir::nameddirtab()
1988 .lock().ok()
1989 .and_then(|g| g.get(idx).map(|nd| nd.dir.clone()))
1990 .unwrap_or_default(),
1991 )),
1992 //WARNING FAKE AND MUST BE DELETED
1993 "userdirs" => {
1994 // ~user → home dir lookup via /etc/passwd. No caching;
1995 // each lookup hits getpwnam.
1996 let c_user = match std::ffi::CString::new(idx) {
1997 Ok(c) => c,
1998 Err(_) => return Some(Value::str("")),
1999 };
2000 let pw = unsafe { libc::getpwnam(c_user.as_ptr()) };
2001 if pw.is_null() {
2002 Some(Value::str(""))
2003 } else {
2004 let home_ptr = unsafe { (*pw).pw_dir };
2005 if home_ptr.is_null() {
2006 return Some(Value::str(""));
2007 }
2008 let home = unsafe { std::ffi::CStr::from_ptr(home_ptr) };
2009 Some(Value::str(home.to_string_lossy().into_owned()))
2010 }
2011 }
2012 "modules" => {
2013 // Loaded modules — compiled-in always-loaded plus
2014 // anything zmodload registered via the
2015 // `_module_<name>` option flag (see
2016 // bin_zmodload). Same source as the
2017 // magic_assoc_lookup path so both `${modules[X]}`
2018 // and `${(t)modules[X]}` agree.
2019 const ALWAYS_LOADED: &[&str] = &[
2020 "zsh/datetime",
2021 "zsh/sched",
2022 "zsh/zutil",
2023 "zsh/parameter",
2024 "zsh/files",
2025 "zsh/complete",
2026 "zsh/complist",
2027 "zsh/regex",
2028 "zsh/system",
2029 "zsh/stat",
2030 "zsh/net/tcp",
2031 "zsh/net/socket",
2032 "zsh/private",
2033 "zsh/zftp",
2034 "zsh/zselect",
2035 "zsh/zle",
2036 "zsh/random",
2037 "zsh/pcre",
2038 "zsh/db/gdbm",
2039 "zsh/cap",
2040 "zsh/clone",
2041 "zsh/curses",
2042 "zsh/mapfile",
2043 "zsh/nearcolor",
2044 "zsh/newuser",
2045 "zsh/mathfunc",
2046 "zsh/termcap",
2047 "zsh/terminfo",
2048 "zsh/profiler",
2049 ];
2050 let loaded = ALWAYS_LOADED.contains(&idx)
2051 || crate::ported::options::opt_state_get(&format!("_module_{}", idx))
2052 .unwrap_or(false);
2053 Some(Value::str(if loaded { "loaded" } else { "" }))
2054 }
2055 //WARNING FAKE AND MUST BE DELETED
2056 "patchars" => Some(Value::str("*?[]<>(){}|^&;")),
2057 "widgets" => {
2058 // ${widgets[name]} → 'builtin' or 'user:func' per
2059 // zleparameter.c widgets_*. Mirrors the
2060 // magic_assoc_lookup path so both lookup sites
2061 // agree.
2062 if let Some(target) = getwidgettarget(idx) {
2063 if target == idx {
2064 Some(Value::str("builtin"))
2065 } else {
2066 Some(Value::str(format!("user:{}", target)))
2067 }
2068 } else {
2069 Some(Value::str(""))
2070 }
2071 }
2072 "keymaps" => {
2073 // ${keymaps[name]} → "1" or "" per zleparameter.c
2074 // keymaps_*. Same canonical seven names as the
2075 // magic_assoc path.
2076 let known = matches!(
2077 idx,
2078 "main" | "emacs" | "viins" | "vicmd" | "isearch" | "command" | "menuselect"
2079 );
2080 if known {
2081 Some(Value::str("1"))
2082 } else {
2083 Some(Value::str(""))
2084 }
2085 }
2086 "mapfile" => {
2087 // zsh/mapfile module: `${mapfile[/path]}` reads a
2088 // file's bytes verbatim. Trailing newline is
2089 // preserved (verified against real zsh: a one-line
2090 // "test\n" file gives len=5, not 4). Downstream
2091 // (f)/(@f) flags handle the trailing-newline split.
2092 if idx == "@" || idx == "*" {
2093 // Splice: not meaningful for mapfile (the whole
2094 // filesystem isn't enumerable). Return empty.
2095 return Some(Value::Array(vec![]));
2096 }
2097 match std::fs::read_to_string(idx) {
2098 Ok(s) => Some(Value::str(s)),
2099 Err(_) => Some(Value::str("")),
2100 }
2101 }
2102 "sysparams" => {
2103 // zsh/system module: `${sysparams[KEY]}` magic
2104 // assoc with three keys per zshmodules(1): `pid`,
2105 // `ppid`, `procsubstpid`. Returns the appropriate
2106 // process ID. Splice form returns the value list.
2107 let pid_str = std::process::id().to_string();
2108 let ppid_str = unsafe { libc::getppid() }.to_string();
2109 if idx == "@" || idx == "*" {
2110 return Some(Value::Array(vec![
2111 Value::str(pid_str),
2112 Value::str(ppid_str),
2113 ]));
2114 }
2115 match idx {
2116 "pid" => Some(Value::str(pid_str)),
2117 "ppid" => Some(Value::str(ppid_str)),
2118 "procsubstpid" => Some(Value::str("0")),
2119 _ => Some(Value::str("")),
2120 }
2121 }
2122 "epochtime" => {
2123 // zsh/datetime — `${epochtime}` is a 2-element
2124 // indexed array: [seconds, nanoseconds] from
2125 // clock_gettime(CLOCK_REALTIME). Direct port of
2126 // the `epochtimegetfn` accessor in
2127 // Src/Modules/datetime.c (struct gsu_array).
2128 let (secs, nsecs) = SystemTime::now()
2129 .duration_since(UNIX_EPOCH)
2130 .map(|d| (d.as_secs() as i64, d.subsec_nanos() as i64))
2131 .unwrap_or((0, 0));
2132 if idx == "@" || idx == "*" {
2133 return Some(Value::Array(vec![
2134 Value::str(secs.to_string()),
2135 Value::str(nsecs.to_string()),
2136 ]));
2137 }
2138 if let Ok(n) = idx.parse::<i64>() {
2139 let pos = if n > 0 {
2140 (n - 1) as usize
2141 } else if n < 0 {
2142 let p = 2 + n;
2143 if p < 0 {
2144 return Some(Value::str(""));
2145 }
2146 p as usize
2147 } else {
2148 return Some(Value::str(""));
2149 };
2150 return match pos {
2151 0 => Some(Value::str(secs.to_string())),
2152 1 => Some(Value::str(nsecs.to_string())),
2153 _ => Some(Value::str("")),
2154 };
2155 }
2156 Some(Value::str(""))
2157 }
2158 "termcap" => {
2159 // `${termcap[cap]}` — direct port of
2160 // `gettermcap()` from Src/Modules/termcap.c:144.
2161 // Backed by ncurses' termcap-emulation API
2162 // (`tgetent`/`tgetstr`/`tgetnum`/`tgetflag`)
2163 // which resolves from the same database
2164 // `${terminfo[…]}` uses but with the legacy
2165 // 2-letter cap names.
2166 Some(Value::str(
2167 crate::modules::termcap::gettermcap(idx).unwrap_or_default(),
2168 ))
2169 }
2170 "terminfo" => {
2171 // `${terminfo[capname]}` — direct port of
2172 // `getterminfo()` from Src/Modules/terminfo.c:135.
2173 // Lazy ncurses tigetstr/tigetnum/tigetflag lookup
2174 // for any capability the script names. The
2175 // executor also pre-seeds the common subset
2176 // into `assoc_arrays["terminfo"]` so
2177 // `${(k)terminfo}` enumerates the seeded names —
2178 // but the magic-assoc path runs FIRST (per the
2179 // `user_defined_assoc` gate at line 3108), so
2180 // for INDEX lookups we always reach `lookup()`
2181 // and uncommon caps like `bel` resolve correctly.
2182 Some(Value::str(
2183 crate::modules::terminfo::getterminfo(idx).unwrap_or_default(),
2184 ))
2185 }
2186 "errnos" => {
2187 // zsh/system module: `${errnos[N]}` is an INDEXED
2188 // array of errno-name strings, 1-based. Direct
2189 // port of the `SPECIALPMDEF("errnos", PM_ARRAY|
2190 // PM_READONLY, …)` entry at
2191 // Src/Modules/system.c:902 + the `errnosgetfn`
2192 // accessor at line 832 (which returns
2193 // `arrdup((char **)sys_errnames)`). Splice (`@`/
2194 // `*`) returns the whole platform-specific list
2195 // as a Value::Array; numeric subscript returns
2196 // the matching name (or "" for unknown).
2197 let table = crate::modules::system::ERRNO_NAMES;
2198 if idx == "@" || idx == "*" {
2199 return Some(Value::Array(
2200 table.iter().map(|(n, _)| Value::str(*n)).collect(),
2201 ));
2202 }
2203 if let Ok(n) = idx.parse::<i64>() {
2204 // 1-based. Negative indices count from end.
2205 let len = table.len() as i64;
2206 let pos = if n > 0 {
2207 (n - 1) as usize
2208 } else if n < 0 {
2209 let p = len + n;
2210 if p < 0 {
2211 return Some(Value::str(""));
2212 }
2213 p as usize
2214 } else {
2215 return Some(Value::str(""));
2216 };
2217 if let Some((name, _)) = table.get(pos) {
2218 return Some(Value::str(*name));
2219 }
2220 }
2221 Some(Value::str(""))
2222 }
2223 // `langinfo` — port of zsh/langinfo module
2224 // (src/zsh/Src/Modules/langinfo.c:402-449). Read-
2225 // only assoc keyed by nl_item names (CODESET,
2226 // D_FMT, RADIXCHAR, etc.); each lookup goes through
2227 // nl_langinfo(3). Splice (`@`/`*`) returns all the
2228 // names known to the module's static table.
2229 "langinfo" => {
2230 if idx == "@" || idx == "*" {
2231 return Some(Value::Array(
2232 crate::langinfo::NL_NAMES
2233 .iter()
2234 .map(|s| Value::str(*s))
2235 .collect(),
2236 ));
2237 }
2238 let val = crate::langinfo::getlanginfo(idx).unwrap_or_default();
2239 Some(Value::str(val))
2240 }
2241 // `.zle.esc` and `.zle.sgr` — port of zsh/hlgroup
2242 // module (src/zsh/Src/Modules/hlgroup.c:81-165).
2243 // Both back into the user's `.zle.hlgroups` assoc.
2244 // `.zle.esc[name]` returns the FULL escape sequence
2245 // for the highlight-group; `.zle.sgr[name]` returns
2246 // just the digit run (after stripping `\033[` and
2247 // trailing `m`). hlgroup.c:39-78 convertattr does
2248 // both modes.
2249 ".zle.esc" | ".zle.sgr" => {
2250 let sgr = name == ".zle.sgr";
2251 // Look up `.zle.hlgroups[idx]` — the user's
2252 // attribute string per hlgroup.c:96-99 (var =
2253 // GROUPVAR i.e. ".zle.hlgroups").
2254 let attr = exec
2255 .assoc(".zle.hlgroups")
2256 .and_then(|m| m.get(idx).cloned())
2257 .unwrap_or_default();
2258 if attr.is_empty() {
2259 // Per hlgroup.c:101-103, missing/unset entry
2260 // returns an empty string (PM_UNSET).
2261 return Some(Value::str(""));
2262 }
2263 let converted = crate::hlgroup::convertattr(&attr, sgr);
2264 Some(Value::str(converted))
2265 }
2266 _ => None,
2267 }
2268 })
2269 }
2270
2271 // `${arr[idx]}` — pop name, then idx_str. zsh is 1-based for positive
2272 // indices; we honor that. `@`/`*` return the whole array as Value::Array
2273 // so Op::Exec splice produces N argv slots. For `${foo[key]}` where foo
2274 // is an assoc, the idx is a string key — we check assoc_arrays first
2275 // when the idx isn't `@`/`*` and the name has an assoc binding.
2276 // WARNING FAKE AND MUST BE DELETED
2277 vm.register_builtin(BUILTIN_ARRAY_INDEX, |vm, _argc| {
2278 let mut idx = vm.pop().to_str();
2279 let name = vm.pop().to_str();
2280 // `\u{02}` prefix on idx = "compile-time DQ context" — set by
2281 // the compile_zsh fast path when the ${arr[KEY]} appeared
2282 // inside `"…"`. The runtime needs this to decide whether
2283 // a `[N,M]` range slice should join (DQ) or stay as array
2284 // (unquoted). The mode-1 BUILTIN_EXPAND_TEXT bridge already
2285 // bumps `exec.in_dq_context`, so detect either signal.
2286 let dq_compile = idx.starts_with('\u{02}');
2287 if dq_compile {
2288 idx = idx[1..].to_string();
2289 }
2290 // `\u{05}` prefix on idx = "(@) flag is set in surrounding
2291 // flag chain" — emitted by parse_zsh_flag_subscript when the
2292 // outer flag chain contains `@`. Direct port of zsh's
2293 // nojoin behavior: `(@)` overrides the DQ-join even inside
2294 // `"…"`. When this sentinel is present, force array shape
2295 // for slices regardless of in_dq_context.
2296 let force_array = idx.starts_with('\u{05}');
2297 if force_array {
2298 idx = idx[1..].to_string();
2299 }
2300 // `\u{06}` prefix = "outer (v) flag wants values for matching
2301 // assoc keys" — flip the (I)/(i) subscript-flag from
2302 // returning keys to returning the corresponding values.
2303 // Direct port of zsh's (v)+(I) combo.
2304 let flip_to_values = idx.starts_with('\u{06}');
2305 if flip_to_values {
2306 idx = idx[1..].to_string();
2307 }
2308 // `\u{07}` prefix = "outer (k) flag wants keys for matching
2309 // assoc values" — flip the (R)/(r) subscript-flag from
2310 // returning values to returning the corresponding keys.
2311 let flip_to_keys = idx.starts_with('\u{07}');
2312 if flip_to_keys {
2313 idx = idx[1..].to_string();
2314 }
2315 // Pre-expand `$((arith))` / `$VAR` / `$(cmd)` references in
2316 // the subscript text so downstream slice / index logic sees
2317 // numeric literals it can parse. The compile path passes the
2318 // raw subscript text as a constant; without expansion, a key
2319 // like `$((1+1)),-1` failed `parse::<i64>()` for the lower
2320 // bound and the whole slice fell back to scalar concat.
2321 // Special-flag keys `(I)pat` / `(R)pat` skip this — those
2322 // already get their `$VAR` resolution inside the matchers.
2323 if idx.contains('$')
2324 && !idx.starts_with("(I)")
2325 && !idx.starts_with("(i)")
2326 && !idx.starts_with("(R)")
2327 && !idx.starts_with("(r)")
2328 && !idx.starts_with("(K)")
2329 && !idx.starts_with("(k)")
2330 {
2331 idx = crate::ported::subst::singsub(&idx);
2332 }
2333 // `${pipestatus[N]}` / `${PIPESTATUS[N]}` — pipeline exit
2334 // status array. Populated by BUILTIN_PIPELINE_EXEC after a
2335 // real pipeline; for single commands fall back to a synthetic
2336 // [last_status] list so `true; echo $pipestatus[1]` prints 0.
2337 // After a non-pipeline command runs, the prior pipestatus
2338 // array becomes stale (zsh resets pipestatus to a single-
2339 // element array on every command). Detect by comparing the
2340 // last element to last_status; if they diverge, fall back
2341 // to the synthetic [last_status] form so e.g.
2342 // true | false; echo "$?"; echo "$pipestatus"
2343 // prints "0" (just the echo's status), not "0 1".
2344 if name == "pipestatus" || name == "PIPESTATUS" {
2345 let arr = with_executor(|exec| {
2346 let cached = exec.array(&name);
2347 let last = exec.last_status().to_string();
2348 match cached {
2349 Some(arr)
2350 if arr.last().map(|s| s.as_str()) == Some(last.as_str()) =>
2351 {
2352 arr
2353 }
2354 _ => vec![last],
2355 }
2356 });
2357 if let Ok(i) = idx.parse::<i64>() {
2358 let len = arr.len() as i64;
2359 let resolved = if i > 0 {
2360 (i - 1) as usize
2361 } else if i < 0 {
2362 let off = len + i;
2363 if off < 0 {
2364 return Value::str("");
2365 }
2366 off as usize
2367 } else {
2368 return Value::str("");
2369 };
2370 return Value::str(arr.get(resolved).cloned().unwrap_or_default());
2371 }
2372 if idx == "@" || idx == "*" {
2373 return Value::Array(arr.into_iter().map(Value::str).collect());
2374 }
2375 }
2376
2377 // Special-name positional-param indexing. `${@[N]}`, `${@[N,M]}`,
2378 // `${*[N]}`, `${argv[N]}` all index the positional-param array
2379 // 1-based (zsh semantics). Without this, `@`/`*`/`argv` fall
2380 // through to the scalar-slice path which slices the joined
2381 // string instead.
2382 if matches!(name.as_str(), "@" | "*" | "argv") {
2383 let arr = with_executor(|exec| exec.pparams());
2384 // Slice form `N,M`.
2385 if let Some((s_str, e_str)) = idx.split_once(',') {
2386 let s_opt: Option<i64> = s_str.trim().parse().ok();
2387 let e_opt: Option<i64> = e_str.trim().parse().ok();
2388 if let (Some(s), Some(e)) = (s_opt, e_opt) {
2389 return Value::Array(
2390 getarrvalue(&arr, s, e)
2391 .into_iter()
2392 .map(Value::str)
2393 .collect(),
2394 );
2395 }
2396 }
2397 // Single index.
2398 if let Ok(i) = idx.parse::<i64>() {
2399 let len = arr.len() as i64;
2400 let resolved = if i > 0 {
2401 (i - 1) as usize
2402 } else if i < 0 {
2403 let off = len + i;
2404 if off < 0 {
2405 return Value::str("");
2406 }
2407 off as usize
2408 } else {
2409 return Value::str("");
2410 };
2411 return Value::str(arr.get(resolved).cloned().unwrap_or_default());
2412 }
2413 // Subscript-flag form on positional params: route through
2414 // getarg with positional_params as the array. Matches
2415 // zsh's `${@[(I)pat]}` / `${@[(r)pat]}` semantics.
2416 if idx.starts_with('(') {
2417 if let Some(crate::ported::params::getarg_out::Value(v)) =
2418 crate::ported::params::getarg(&idx, Some(&arr), None, None)
2419 {
2420 return v;
2421 }
2422 }
2423 }
2424 // Magic special-parameter assoc lookups — synthesized from shell
2425 // state on access. zsh exposes shell-introspection assocs like
2426 // `${commands[ls]}`, `${aliases[ll]}`, `${functions[foo]}`,
2427 // `${options[interactive]}`, etc. None of these are stored in
2428 // `assoc_arrays`; we generate the value at lookup time.
2429 //
2430 // BUT: if the user declared `typeset -A NAME` and assigned
2431 // values, their declaration wins. This matches zsh's actual
2432 // module behavior (verified against /bin/zsh): `typeset -A
2433 // langinfo; langinfo[CODESET]=UTF-8; echo $langinfo[CODESET]`
2434 // prints `UTF-8` even though `zsh/langinfo` would normally
2435 // shadow it with nl_langinfo(3). The C source enforces this
2436 // via the module loader: `bin_zmodload` only registers the
2437 // special-parameter table entry when no existing assoc with
2438 // that name exists. Mirroring: skip the magic path if
2439 // `name` is already in `assoc_arrays`.
2440 let user_defined_assoc =
2441 with_executor(|exec| exec.assoc(&name).is_some());
2442 if !user_defined_assoc {
2443 if let Some(v) = magic_assoc_lookup(&name, &idx) {
2444 // Magic-assoc with `(I)pat` glob-match returned an
2445 // Array of matching keys. In DQ context (the user
2446 // wrote `"${aliases[(I)foo*]}"`), zsh joins array
2447 // results with the first IFS char per Src/subst.c
2448 // paramsubst's `nojoin` gating. Without this the
2449 // outer DQ-string was treating the array as a
2450 // splice and emitting one arg per matching key.
2451 if dq_compile {
2452 if let Value::Array(items) = &v {
2453 let strs: Vec<String> =
2454 items.iter().map(|i| i.to_str()).collect();
2455 let sep = with_executor(|exec| {
2456 exec.scalar("IFS")
2457 .and_then(|s| s.chars().next())
2458 .unwrap_or(' ')
2459 });
2460 return Value::str(strs.join(&sep.to_string()));
2461 }
2462 }
2463 return v;
2464 }
2465 }
2466 with_executor(|exec| match idx.as_str() {
2467 "@" | "*" => {
2468 // Splice: assoc → values list (zsh's `${foo[@]}` for assoc);
2469 // indexed → element list. For assoc the order of values is
2470 // implementation-defined (matches HashMap iteration).
2471 if let Some(map) = exec.assoc(&name) {
2472 return Value::Array(map.values().map(Value::str).collect());
2473 }
2474 match exec.array(&name) {
2475 Some(v) => Value::Array(v.iter().map(Value::str).collect()),
2476 None => Value::Array(vec![]),
2477 }
2478 }
2479 _ => {
2480 // Magic-assoc lookup (`${aliases[gst]}`,
2481 // `${commands[ls]}`, etc.) — names backed by zsh's
2482 // parameter-module hashes (Src/Modules/parameter.c)
2483 // that don't live in `exec.assoc_arrays`. Direct
2484 // delegation to the canonical port reader.
2485 if crate::exec::scan_magic_assoc_keys(&name).is_some() {
2486 return Value::str(
2487 exec.get_special_array_value(&name, &idx).unwrap_or_default());
2488 }
2489 if let Some(map) = exec.assoc(&name) {
2490 if let Some((flags, pat)) = (|s: &str| -> Option<(String, String)> {
2491 // Port of subst.c subscript-flag parser:
2492 // `(I)pat` / `(R)pat` / `(i)pat` / `(r)pat`.
2493 // Returns (flags_chars, pattern_after).
2494 let s = s.trim_start();
2495 let rest = s.strip_prefix('(')?;
2496 let close = rest.find(')')?;
2497 let flags = rest[..close].to_string();
2498 let pat = rest[close + 1..].to_string();
2499 if flags.chars().next().is_some_and(|c| matches!(c, 'I' | 'R' | 'i' | 'r' | 'k' | 'K' | 'n' | 'e' | 'b' | 'w' | 'f' | 'p' | 's')) {
2500 Some((flags, pat))
2501 } else { None }
2502 })(&idx) {
2503 // (v)+(I)/(i): subscript searches keys but
2504 // outer wants values. Iterate the assoc and
2505 // return values for keys that match `pat`.
2506 if flip_to_values
2507 && (flags.contains('I') || flags.contains('i'))
2508 {
2509 let return_all = flags.contains('I');
2510 let mut out: Vec<String> = Vec::new();
2511 for (k, v) in map.iter() {
2512 if crate::exec::glob_match_static(k, &pat) {
2513 out.push(v.clone());
2514 if !return_all {
2515 break;
2516 }
2517 }
2518 }
2519 return Value::str(out.join(" "));
2520 }
2521 // (k)+(R)/(r): subscript searches values but
2522 // outer wants keys. Iterate the assoc and
2523 // return keys whose values match.
2524 if flip_to_keys
2525 && (flags.contains('R') || flags.contains('r'))
2526 {
2527 let return_all = flags.contains('R');
2528 let mut out: Vec<String> = Vec::new();
2529 for (k, v) in map.iter() {
2530 if crate::exec::glob_match_static(v, &pat) {
2531 out.push(k.clone());
2532 if !return_all {
2533 break;
2534 }
2535 }
2536 }
2537 return Value::str(out.join(" "));
2538 }
2539 // Default flag handling — route to getarg's
2540 // hash-search arm (params.c:1581-1660).
2541 match crate::ported::params::getarg(&idx, None, Some(&map), None) {
2542 Some(crate::ported::params::getarg_out::Value(v)) => return v,
2543 _ => {}
2544 }
2545 }
2546 return Value::str(map.get(&idx).cloned().unwrap_or_default());
2547 }
2548
2549 let arr = match exec.array(&name) {
2550 Some(a) => a,
2551 None => {
2552 // Fall back to scalar subscripting on `variables`.
2553 // zsh treats `${str[N]}` and `${str[N,M]}` as
2554 // 1-based char indexing. Subscript flags
2555 // `(w)`/`(s/sep/)` on scalars split before
2556 // indexing — direct port of zsh's
2557 // zshparam(1) "Subscript Flags" `w` and `s`.
2558 let scalar = exec.get_variable(&name);
2559 if scalar.is_empty() {
2560 return Value::str("");
2561 }
2562 // `(w)N` on scalar: split by IFS into words,
2563 // return the Nth (1-based). zsh's word
2564 // separator defaults to IFS whitespace.
2565 // `(s/sep/)` overrides the separator. zsh
2566 // also accepts `(ws[chars])` — `s` followed
2567 // by a `[chars]` set treated as IFS for this
2568 // operation.
2569 if let Some((flags, pat)) = (|s: &str| -> Option<(String, String)> {
2570 // Port of subst.c subscript-flag parser:
2571 // `(I)pat` / `(R)pat` / `(i)pat` / `(r)pat`.
2572 // Special-case `(s<delim>...<delim>)` per
2573 // params.c:1458-1476 — `s` introduces a
2574 // delimited separator block.
2575 // Returns (flags_chars, pattern_after).
2576 let s = s.trim_start();
2577 let rest = s.strip_prefix('(')?;
2578 let close = rest.find(')')?;
2579 let flags = rest[..close].to_string();
2580 let pat = rest[close + 1..].to_string();
2581 if flags.starts_with('s') {
2582 return Some((flags, pat));
2583 }
2584 if flags.chars().next().is_some_and(|c| matches!(c, 'I' | 'R' | 'i' | 'r' | 'k' | 'K' | 'n' | 'e' | 'b' | 'w' | 'f' | 'p' | 's')) {
2585 Some((flags, pat))
2586 } else { None }
2587 })(&idx) {
2588 if flags.contains('w') {
2589 if let Ok(n) = pat.parse::<i64>() {
2590 let words: Vec<&str> = scalar.split_whitespace().collect();
2591 let len = words.len() as i64;
2592 let i = if n > 0 {
2593 (n - 1) as usize
2594 } else if n < 0 {
2595 let off = len + n;
2596 if off < 0 {
2597 return Value::str("");
2598 }
2599 off as usize
2600 } else {
2601 return Value::str("");
2602 };
2603 return Value::str(
2604 words.get(i).map(|s| s.to_string()).unwrap_or_default(),
2605 );
2606 }
2607 }
2608 // `(s/sep/)N` is a NO-OP for scalar `[N]`
2609 // indexing — confirmed by testing zsh
2610 // (`a=hello; ${a[(s/l/)1]}` returns "h",
2611 // same as `${a[1]}`). The `(s)` flag
2612 // only affects splitting in word-list
2613 // contexts (`${(s/sep/)var}` without
2614 // index, or `[@]` form). Strip the
2615 // flag, parse the index normally, fall
2616 // through to char slicing.
2617 if flags.starts_with('s') {
2618 if let Ok(i) = pat.parse::<i64>() {
2619 let s_chars: Vec<String> = scalar.chars().map(|c| c.to_string()).collect();
2620 return Value::str(crate::ported::params::getarrvalue(&s_chars, i, i).concat());
2621 }
2622 }
2623 // (i)/(I)/(r)/(R) on scalar — route
2624 // through getarg's scalar char-search
2625 // arm (params.c:1798-1980). Faithful
2626 // port lives in src/ported/params.rs;
2627 // this branch defers to it to avoid
2628 // duplicated drift.
2629 if flags.chars().all(|c| matches!(c, 'i' | 'I' | 'r' | 'R' | 'e')) {
2630 let _ = &pat;
2631 if let Some(crate::ported::params::getarg_out::Value(v)) =
2632 crate::ported::params::getarg(&idx, None, None, Some(&scalar))
2633 {
2634 return v;
2635 }
2636 }
2637 }
2638 // Build a per-char pseudo-array and route slice/index
2639 // through getarrvalue so 1-based inclusive semantics
2640 // and negative-from-end indexing match
2641 // Src/params.c::getstrvalue's char-arm.
2642 let s_chars: Vec<String> = scalar.chars().map(|c| c.to_string()).collect();
2643 if let Some((start_s, end_s)) = idx.split_once(',') {
2644 let parse_one = |s: &str, exec: &mut ShellExecutor| -> Option<i64> {
2645 let t = s.trim();
2646 if t.is_empty() { return None; }
2647 if let Ok(i) = t.parse::<i64>() { return Some(i); }
2648 Some(crate::ported::math::mathevali(&crate::ported::subst::singsub(t)).unwrap_or(0))
2649 };
2650 let s_opt = parse_one(start_s, exec);
2651 let e_opt = parse_one(end_s, exec);
2652 let s_i = s_opt.unwrap_or(1);
2653 let e_i = e_opt.unwrap_or(s_chars.len() as i64);
2654 return Value::str(crate::ported::params::getarrvalue(&s_chars, s_i, e_i).concat());
2655 }
2656 let i = match idx.parse::<i64>() {
2657 Ok(i) => i,
2658 Err(_) => crate::ported::math::mathevali(&crate::ported::subst::singsub(&idx)).unwrap_or(0),
2659 };
2660 return Value::str(crate::ported::params::getarrvalue(&s_chars, i, i).concat());
2661 }
2662 };
2663
2664 // Subscript flag form: (r)pat / (R)pat / (i)pat / (I)pat
2665 // / (e)str / (n:N:)pat. Returns first/last matching value
2666 // or first/last matching index per zsh semantics.
2667 if let Some((flags, pat)) = (|s: &str| -> Option<(String, String)> {
2668 let s = s.trim_start();
2669 let rest = s.strip_prefix('(')?;
2670 let close = rest.find(')')?;
2671 let flags = rest[..close].to_string();
2672 let pat = rest[close + 1..].to_string();
2673 if flags.chars().next().is_some_and(|c| matches!(c, 'I' | 'R' | 'i' | 'r' | 'k' | 'K' | 'n' | 'e' | 'b' | 'w' | 'f' | 'p' | 's')) {
2674 Some((flags, pat))
2675 } else { None }
2676 })(&idx) {
2677 // Route to getarg's array-search arm
2678 // (params.c:1672-1719).
2679 let _ = (&flags, &pat); // silence unused if any
2680 match crate::ported::params::getarg(&idx, Some(&arr), None, None) {
2681 Some(crate::ported::params::getarg_out::Value(v)) => return v,
2682 _ => {}
2683 }
2684 return Value::str("");
2685 }
2686
2687 // Slice form `N,M`: comma separator with int-or-arith
2688 // operands on each side. Negative indices count from
2689 // end. Direct port of zsh's getindex() N,M slice.
2690 //
2691 // Return shape depends on context: in DQ (`"${arr[2,4]}"`)
2692 // zsh joins the slice with the first IFS char into a
2693 // single scalar (Src/subst.c sepjoin path with nojoin=0);
2694 // in unquoted (`${arr[2,4]}`) or `[@]`-style context it
2695 // remains an array. Detect via in_dq_context which the
2696 // BUILTIN_EXPAND_TEXT mode-1 wrapper bumps.
2697 if let Some((start_s, end_s)) = idx.split_once(',') {
2698 // Inline subscript-int parse — mirrors getarg's
2699 // mathevalarg fallback (params.c:1567).
2700 let parse_one = |s: &str, exec: &mut ShellExecutor| -> Option<i64> {
2701 let t = s.trim();
2702 if t.is_empty() { return None; }
2703 if let Ok(i) = t.parse::<i64>() { return Some(i); }
2704 Some(crate::ported::math::mathevali(&crate::ported::subst::singsub(t)).unwrap_or(0))
2705 };
2706 let start = parse_one(start_s, exec);
2707 let end = parse_one(end_s, exec);
2708 if let (Some(s), Some(e)) = (start, end) {
2709 // KSH_ARRAYS: indices are 0-based, so shift
2710 // positive values up by 1 before the (1-based)
2711 // slicer runs. zsh: `setopt ksh_arrays;
2712 // a=(a b c d); echo $a[1,2]` → `b c`.
2713 let ksh = crate::ported::options::opt_state_get("ksharrays").unwrap_or(false);
2714 let s = if ksh && s >= 0 { s + 1 } else { s };
2715 let e = if ksh && e >= 0 { e + 1 } else { e };
2716 let sliced = getarrvalue(&arr, s, e);
2717 // (@) flag in surrounding chain overrides DQ-join
2718 // — always splat to Value::Array so the caller's
2719 // (@)-aware splat path emits each element as its
2720 // own word.
2721 if !force_array && (exec.in_dq_context > 0 || dq_compile) {
2722 let ifs_first = exec
2723 .get_variable("IFS")
2724 .chars()
2725 .next()
2726 .unwrap_or(' ')
2727 .to_string();
2728 return Value::str(sliced.join(&ifs_first));
2729 }
2730 return Value::Array(
2731 sliced.into_iter().map(Value::str).collect(),
2732 );
2733 }
2734 }
2735
2736 // Single index — try literal int first (fast), then fall
2737 // back to arithmetic eval which handles bare variable
2738 // names (`arr[i]`), expressions (`arr[i+1]`), etc.
2739 // KSH_ARRAYS: 0-based, so a 0 means first element and
2740 // valid indices are 0..len-1. Without this, `setopt
2741 // ksh_arrays; a[0]` returned empty (treating 0 as
2742 // "before first" per the standard 1-based path).
2743 let i = match idx.parse::<i64>() {
2744 Ok(i) => i,
2745 Err(_) => crate::ported::math::mathevali(&crate::ported::subst::singsub(&idx)).unwrap_or(0),
2746 };
2747 let len = arr.len() as i64;
2748 let ksh = crate::ported::options::opt_state_get("ksharrays").unwrap_or(false);
2749 let resolved = if ksh {
2750 if i < 0 {
2751 let off = len + i;
2752 if off < 0 {
2753 return Value::str("");
2754 }
2755 off as usize
2756 } else if i >= len {
2757 return Value::str("");
2758 } else {
2759 i as usize
2760 }
2761 } else if i > 0 {
2762 (i - 1) as usize
2763 } else if i < 0 {
2764 let off = len + i;
2765 if off < 0 {
2766 return Value::str("");
2767 }
2768 off as usize
2769 } else {
2770 return Value::str("");
2771 };
2772 Value::str(arr.get(resolved).cloned().unwrap_or_default())
2773 }
2774 })
2775 });
2776
2777 // `${(flags)name}` — apply zsh parameter flags. See BUILTIN_PARAM_FLAG
2778 // doc comment for the supported flag set. Algorithm: load `name` as a
2779 // current-value (scalar from variables/env, array from arrays, or assoc
2780 // from assoc_arrays), then walk `flags` char-by-char applying each
2781 // transformation. Final state is either Value::str or Value::Array
2782 // depending on the last flag.
2783 // Bridge entry that preserves array shape — see the const's doc.
2784 // Pops [content] (the brace body without the outer ${...}) and
2785 // returns Value::Array of per-element words.
2786 //WARNING FAKE AND MUST BE DELETED
2787 vm.register_builtin(BUILTIN_BRIDGE_BRACE_ARRAY, |vm, _argc| {
2788 // Inner body of `${(...)...}` (already stripped of `${`/`}` by
2789 // the caller). Re-wrap and route through subst.rs's paramsubst
2790 // so the flag-loop + per-operator array semantics
2791 // (e.g. `(M)arr:#pat`) execute properly. Earlier this returned
2792 // the body verbatim, which is why `${(M)arr:#pat}` printed as
2793 // literal text.
2794 let body = vm.pop().to_str();
2795 let full = format!("${{{}}}", body);
2796 let result = with_executor(|exec| {
2797 let mut ret_flags: i32 = 0;
2798 let (_full_str, _new_pos, nodes) = crate::ported::subst::paramsubst(
2799 &full,
2800 0,
2801 false,
2802 0i32,
2803 &mut ret_flags,
2804 );
2805 // c:Src/subst.c errflag bail — propagate to caller's
2806 // exit status the way `subst_state_commit_to_executor`
2807 // used to.
2808 if crate::ported::utils::errflag.load(std::sync::atomic::Ordering::Relaxed) != 0 {
2809 exec.set_last_status(1);
2810 }
2811 nodes
2812 });
2813 if result.is_empty() {
2814 return fusevm::Value::Array(Vec::new());
2815 }
2816 if result.len() == 1 {
2817 return fusevm::Value::str(result.into_iter().next().unwrap());
2818 }
2819 fusevm::Value::Array(result.into_iter().map(fusevm::Value::str).collect())
2820 });
2821
2822 vm.register_builtin(BUILTIN_PARAM_FLAG, |vm, _argc| {
2823 let mut flags = vm.pop().to_str();
2824 let name = vm.pop().to_str();
2825
2826 // Compile path tags DQ-wrapped expressions with a leading
2827 // `\u{02}` sentinel. In DQ context, array-only flags are
2828 // no-ops per zsh: `(o)`/`(O)`/`(n)`/`(i)`/`(M)`/`(u)` only
2829 // fire in array context. Strip those flag chars before
2830 // processing so the join-as-scalar path returns the original
2831 // element order.
2832 let dq_compile = flags.starts_with('\u{02}');
2833 if dq_compile {
2834 flags = flags[1..].to_string();
2835 }
2836 // `\u{03}` sentinel = the original name had `[@]`/`[*]` suffix.
2837 // The compile path strips the suffix from name (fast-path
2838 // requires identifier-only), but encodes the splice context
2839 // through this sentinel so DQ flag-stripping still respects it.
2840 let had_at_subscript = flags.starts_with('\u{03}');
2841 if had_at_subscript {
2842 flags = flags[1..].to_string();
2843 }
2844 // `\u{04}` sentinel = scalar-assignment context (compile-time
2845 // detected via `scalar_assign_depth`). Direct port of zsh's
2846 // PREFORK_SINGLE bit (Src/exec.c::addvars line 2546). Strip
2847 // the sentinel and remember it for the split-flag gate
2848 // below.
2849 let ssub_compile = flags.starts_with('\u{04}');
2850 if ssub_compile {
2851 flags = flags[1..].to_string();
2852 }
2853 let dq_runtime = with_executor(|exec| exec.in_dq_context > 0);
2854 // PREFORK_SINGLE equivalent — set when the BUILTIN_PARAM_FLAG
2855 // is being evaluated as the RHS of a scalar assignment.
2856 // Direct port of Src/subst.c:1759 `int ssub = (pf_flags &
2857 // PREFORK_SINGLE)`. Per Src/subst.c:3902 `force_split = !ssub
2858 // && (spbreak || spsep)` — when ssub, the force-split path
2859 // is gated off, so split flags `(f)` / `(s:STR:)` / `(0)` /
2860 // `(z)` produce the original scalar verbatim. Consulted at
2861 // each split flag's effect site below (the flag char itself
2862 // is not removed; instead the split is skipped).
2863 let ssub_runtime = ssub_compile
2864 || with_executor(|exec| exec.in_scalar_assign > 0);
2865 // `[@]` / `[*]` subscript on the name overrides the DQ
2866 // strip — explicit `[@]` marks the array as splice-
2867 // expanded so array-only flags (`o`/`O`/`n`/`i`/`u`)
2868 // still fire on the per-element list. Direct port of
2869 // zsh's subst.c nojoin/spbreak path. Without this,
2870 // `"${(o)a[@]}"` skipped the sort in DQ.
2871 // The explicit `@` flag is also an array-context marker — zsh
2872 // treats `${(@o)a}` same as `${(o)a[@]}` (both keep array-only
2873 // sort flags active in DQ). Without checking flags too, the DQ
2874 // strip dropped `o` for the bare-name `(@o)` case.
2875 let has_at_subscript = had_at_subscript
2876 || name.ends_with("[@]")
2877 || name.ends_with("[*]")
2878 || flags.contains('@');
2879 if (dq_compile || dq_runtime) && !has_at_subscript {
2880 // Strip array-only flag CHARS (sort/unique/index variants)
2881 // from the flag chain — but only when they appear as
2882 // bare flag chars, not as part of a flag-arg like
2883 // `(r:NAME::pad:)` where NAME may contain `n`/`o`/etc.
2884 // Direct port of zsh's nojoin gating in Src/subst.c:1813
2885 // which gates these flags off in DQ context. The C source
2886 // walks the flag chain as a state machine; we mirror that
2887 // by tracking arg-region depth: when we hit `(j:`, `(s:`,
2888 // `(l:`, `(r:` etc., switch into "in-arg" mode and copy
2889 // chars verbatim until the closing delim. Without this
2890 // careful skip, `(r:hlen:: :)` lost the `n` inside the
2891 // identifier, so width parsing returned a truncated name.
2892 let bytes = flags.as_bytes();
2893 let mut out = String::with_capacity(bytes.len());
2894 let mut i = 0;
2895 while i < bytes.len() {
2896 let b = bytes[i] as char;
2897 // Flag chars that take a delimited argument:
2898 // `j:STR:` join, `s:STR:` split, `l:N::pad:`,
2899 // `r:N::pad:`, `Z:STR:`, `g:STR:`. The arg is
2900 // bracket-delimited by the next char.
2901 if matches!(b, 'j' | 's' | 'l' | 'r' | 'Z' | 'g')
2902 && i + 1 < bytes.len()
2903 && !(bytes[i + 1] as char).is_ascii_alphanumeric()
2904 && bytes[i + 1] != b'_'
2905 {
2906 let delim_open = bytes[i + 1] as char;
2907 let delim_close = match delim_open {
2908 '[' => ']',
2909 '{' => '}',
2910 '(' => ')',
2911 '<' => '>',
2912 c => c,
2913 };
2914 out.push(b);
2915 out.push(delim_open);
2916 i += 2;
2917 // For `l:N::pad:` and `r:N::pad:`, the format has
2918 // TWO arg sections: `:N:` then `:pad:`. Walk
2919 // through both, plus any further sections until
2920 // we run out of immediate-`delim_close+delim_open`
2921 // pairs. This matches zsh subst.c get_strarg
2922 // which is called in a loop.
2923 loop {
2924 while i < bytes.len() && bytes[i] as char != delim_close {
2925 out.push(bytes[i] as char);
2926 i += 1;
2927 }
2928 if i < bytes.len() {
2929 out.push(delim_close);
2930 i += 1;
2931 }
2932 // Continue if the next char is the same
2933 // open-delim (another arg section).
2934 if i < bytes.len() && bytes[i] as char == delim_open {
2935 out.push(delim_open);
2936 i += 1;
2937 continue;
2938 }
2939 break;
2940 }
2941 continue;
2942 }
2943 if matches!(b, 'o' | 'O' | 'n' | 'i' | 'u') {
2944 i += 1;
2945 continue;
2946 }
2947 out.push(b);
2948 i += 1;
2949 }
2950 flags = out;
2951 }
2952
2953 // Initial state: prefer assoc → array → scalar lookup. If `P` flag
2954 // is in the chain, we'll re-fetch with the indirected name later.
2955 enum St {
2956 S(String),
2957 A(Vec<String>),
2958 }
2959
2960 // Detect (k) flag PRESENCE early — we need to seed
2961 // magic-assoc lookups with the key set before the flag
2962 // walker re-orders things. Use `flags` (the post-sentinel-
2963 // strip string) since the `chars` Vec is built later.
2964 let want_keys = flags.contains('k');
2965 let want_values = flags.contains('v');
2966
2967 // Literal-string operand sentinel: `${(flags)"text"}` compiles to a
2968 // name prefixed with `\u{01}` followed by the literal value. Skip
2969 // the lookup and seed state with the literal scalar.
2970 let mut state = if let Some(literal) = name.strip_prefix('\u{01}') {
2971 St::S(literal.to_string())
2972 } else {
2973 with_executor(|exec| {
2974 if let Some(map) = exec.assoc(&name) {
2975 // For assoc, default to value list (no flag) — `(k)`/`(v)`
2976 // override.
2977 St::A(map.values().cloned().collect())
2978 } else if let Some(arr) = exec.array(&name) {
2979 St::A(arr)
2980 } else if want_keys {
2981 // `${(k)<magic-assoc>}` — names like `aliases`,
2982 // `functions`, `options`, `commands`, `terminfo`,
2983 // `errnos` etc. are not in `assoc_arrays` (they're
2984 // synthesized via magic-getfn). When the flag set
2985 // includes `k`, return the SCANFN-equivalent key
2986 // list. Direct port of paramsubst's per-special
2987 // scanfn dispatch (Src/Modules/parameter.c +
2988 // system.c + terminfo.c et al.).
2989 if let Some(keys) =
2990 crate::exec::scan_magic_assoc_keys(&name)
2991 {
2992 St::A(keys)
2993 } else {
2994 St::S(exec.get_variable(&name))
2995 }
2996 } else if want_values {
2997 // `${(v)<magic-assoc>}` — values for the same
2998 // magic-getfn list above. zinit/p10k both use
2999 // `${(v)aliases}`-style introspection; the
3000 // earlier (k) branch covered the keys but the
3001 // (v) symmetry was missing, so plugin code that
3002 // looped over alias bodies got an empty list.
3003 if let Some(keys) =
3004 crate::exec::scan_magic_assoc_keys(&name)
3005 {
3006 let values: Vec<String> = keys
3007 .iter()
3008 .map(|k| exec.get_special_array_value(&name, k).unwrap_or_default())
3009 .collect();
3010 St::A(values)
3011 } else {
3012 St::S(exec.get_variable(&name))
3013 }
3014 } else {
3015 St::S(exec.get_variable(&name))
3016 }
3017 })
3018 };
3019
3020 let chars: Vec<char> = flags.chars().collect();
3021 // Pre-scan for `(P)` — indirect: zsh's bin_zmodload-style
3022 // P flag is special. It applies BEFORE all per-char
3023 // transforms regardless of position in the flag string,
3024 // because zsh's paramsubst sets `aspar` early and the
3025 // INITIAL value is the indirected lookup. Without this
3026 // pre-resolve, `${(UP)ref}` first uppercases ref's value
3027 // ("target" → "TARGET") then tries to indirect on "TARGET"
3028 // which is unset, returning empty. zsh produces "HELLO"
3029 // because it indirects FIRST (ref→target, lookup target =
3030 // "hello") then uppercases.
3031 let want_indirect = chars.iter().any(|&c| c == 'P');
3032 // `(Pt)` is a special pairing — type-of-the-target, not
3033 // value-of-the-target. Direct port of Src/subst.c:2807-2854
3034 // `wantt` arm: zsh's `wantt` runs AFTER `aspar` has resolved
3035 // the pm pointer to the target's Param struct, then reads
3036 // `pm->node.flags` for type. Doing the value pre-walker here
3037 // discards the target name and the (t) handler ends up
3038 // introspecting the original pointer ("n" → scalar). Skip
3039 // the value-walker for (Pt); the (t) handler resolves the
3040 // target name itself via `target_for_type` below.
3041 let want_type = chars.iter().any(|&c| c == 't');
3042 let pt_combo = want_indirect && want_type;
3043 if want_indirect && !pt_combo && !matches!(state, St::S(ref s) if s.is_empty()) {
3044 // The state at this point holds the (P) TARGET reference,
3045 // not the original pointer name — the param-flag dispatch
3046 // upstream initialized state to `exec.get_variable(name)`.
3047 // Resolve that target. Two shapes:
3048 // - bare name: `${(P)n}` with `n=foo` → state="foo",
3049 // look up `foo` directly.
3050 // - subscripted name: `${(P)n2}` with `n2="arr[-1]"` →
3051 // state="arr[-1]", split into base="arr" + sub="-1"
3052 // and route through expand_string. Direct port of
3053 // Src/subst.c:2799-2806 where `fetchvalue(&vbuf, &ov, …)`
3054 // parses both name and any trailing `[…]` subscript
3055 // from the same input pointer. Without this split,
3056 // a subscripted target was looked up as a literal
3057 // parameter named "arr[-1]" (always unset → empty).
3058 fn resolve_indirect_target(target: &str, exec: &mut ShellExecutor) -> St {
3059 let (base, sub) = match target.find('[') {
3060 Some(b) if target.ends_with(']') => {
3061 let n = &target[..b];
3062 let s = &target[b + 1..target.len() - 1];
3063 (n.to_string(), Some(s.to_string()))
3064 }
3065 _ => (target.to_string(), None),
3066 };
3067 // Bare-name path.
3068 if sub.is_none() {
3069 if let Some(arr) = exec.array(&base) {
3070 return St::A(arr);
3071 }
3072 return St::S(exec.get_variable(&base));
3073 }
3074 let sub_str = sub.unwrap();
3075 // Assoc lookup: `${(P)"map[key]"}` — single value for
3076 // the given key.
3077 if let Some(m) = exec.assoc(&base) {
3078 return St::S(m.get(&sub_str).cloned().unwrap_or_default());
3079 }
3080 // Indexed-array subscript. Direct port of getindex()
3081 // (Src/params.c) handling for negative indices and
3082 // `lo,hi` slice. expand_string() can't be used here —
3083 // it routes the subscripted form through compile-time
3084 // paths that re-fetch the WHOLE array on the bridge
3085 // back from subst_port. Apply the subscript here
3086 // directly.
3087 if let Some(arr) = exec.array(&base) {
3088 let n = arr.len() as i64;
3089 let to_zero = |i: i64| -> i64 {
3090 if i > 0 {
3091 i - 1
3092 } else if i < 0 {
3093 n + i
3094 } else {
3095 0
3096 }
3097 };
3098 if let Some((lo_s, hi_s)) = sub_str.split_once(',') {
3099 let lo = lo_s.trim().parse::<i64>().unwrap_or(1);
3100 let hi = hi_s.trim().parse::<i64>().unwrap_or(n);
3101 let lo_i = to_zero(lo).max(0);
3102 let hi_i = to_zero(hi);
3103 if hi_i < lo_i || lo_i >= n {
3104 return St::A(Vec::new());
3105 }
3106 let hi_clamped = (hi_i + 1).min(n) as usize;
3107 return St::A(arr[lo_i as usize..hi_clamped].to_vec());
3108 }
3109 if sub_str == "@" || sub_str == "*" {
3110 return St::A(arr);
3111 }
3112 if let Ok(idx) = sub_str.parse::<i64>() {
3113 let real = to_zero(idx);
3114 if real < 0 || real >= n {
3115 return St::S(String::new());
3116 }
3117 return St::S(arr[real as usize].clone());
3118 }
3119 }
3120 // Fallback: scalar with subscript = char-range.
3121 let val = exec.get_variable(&base);
3122 let chars: Vec<char> = val.chars().collect();
3123 let n = chars.len() as i64;
3124 let to_zero = |i: i64| -> i64 {
3125 if i > 0 {
3126 i - 1
3127 } else if i < 0 {
3128 n + i
3129 } else {
3130 0
3131 }
3132 };
3133 if let Some((lo_s, hi_s)) = sub_str.split_once(',') {
3134 let lo = lo_s.trim().parse::<i64>().unwrap_or(1);
3135 let hi = hi_s.trim().parse::<i64>().unwrap_or(n);
3136 let lo_i = to_zero(lo).max(0);
3137 let hi_i = to_zero(hi);
3138 if hi_i < lo_i || lo_i >= n {
3139 return St::S(String::new());
3140 }
3141 let hi_clamped = (hi_i + 1).min(n) as usize;
3142 return St::S(chars[lo_i as usize..hi_clamped].iter().collect());
3143 }
3144 if let Ok(idx) = sub_str.parse::<i64>() {
3145 let real = to_zero(idx);
3146 if real < 0 || real >= n {
3147 return St::S(String::new());
3148 }
3149 return St::S(chars[real as usize].to_string());
3150 }
3151 St::S(String::new())
3152 }
3153 state = match state {
3154 St::S(name) => with_executor(|exec| resolve_indirect_target(&name, exec)),
3155 St::A(names) => with_executor(|exec| {
3156 let resolved: Vec<String> = names
3157 .into_iter()
3158 .map(|n| exec.get_variable(&n))
3159 .collect();
3160 St::A(resolved)
3161 }),
3162 };
3163 }
3164 // Pre-scan for `(p)` — print-style escape interpretation for
3165 // any subsequent `(s::)`, `(j::)`, `(l::)`, `(r::)` argument
3166 // strings. Direct port of src/zsh/Src/subst.c:2381-2382 which
3167 // sets `escapes = 1` and then `untok_and_escape` performs the
3168 // print-escape on those flag args. Order in zsh: only flags
3169 // that appear AFTER `p` get their args escaped; we approximate
3170 // by detecting `p` at the start of the flag string. The exact
3171 // C semantics rely on left-to-right state, but `(ps:..:)` is
3172 // by far the dominant idiom and a position-aware pre-scan is
3173 // the simplest faithful match.
3174 let print_escapes = chars
3175 .iter()
3176 .take_while(|&&c| c != 's' && c != 'j' && c != 'l' && c != 'r')
3177 .any(|&c| c == 'p');
3178 // print_escape_str — interpret \n, \t, \r, \\, \xNN, \NNN
3179 // (octal) per zsh's untok_and_escape behavior. Returns the
3180 // decoded string. Used inline below when print_escapes is set.
3181 fn print_escape_str(s: &str) -> String {
3182 let mut out = String::with_capacity(s.len());
3183 let mut chars = s.chars().peekable();
3184 while let Some(c) = chars.next() {
3185 if c != '\\' {
3186 out.push(c);
3187 continue;
3188 }
3189 match chars.next() {
3190 Some('n') => out.push('\n'),
3191 Some('t') => out.push('\t'),
3192 Some('r') => out.push('\r'),
3193 Some('\\') => out.push('\\'),
3194 Some('\'') => out.push('\''),
3195 Some('"') => out.push('"'),
3196 Some('a') => out.push('\x07'),
3197 Some('b') => out.push('\x08'),
3198 Some('e') | Some('E') => out.push('\x1b'),
3199 Some('f') => out.push('\x0c'),
3200 Some('v') => out.push('\x0b'),
3201 Some('0') => out.push('\0'),
3202 Some('x') => {
3203 let mut hex = String::new();
3204 for _ in 0..2 {
3205 match chars.peek() {
3206 Some(&h) if h.is_ascii_hexdigit() => {
3207 hex.push(h);
3208 chars.next();
3209 }
3210 _ => break,
3211 }
3212 }
3213 if let Ok(n) = u32::from_str_radix(&hex, 16) {
3214 if let Some(c) = char::from_u32(n) {
3215 out.push(c);
3216 }
3217 }
3218 }
3219 Some(d) if d.is_ascii_digit() => {
3220 let mut oct = String::from(d);
3221 for _ in 0..2 {
3222 match chars.peek() {
3223 Some(&h) if h.is_digit(8) => {
3224 oct.push(h);
3225 chars.next();
3226 }
3227 _ => break,
3228 }
3229 }
3230 if let Ok(n) = u32::from_str_radix(&oct, 8) {
3231 if let Some(c) = char::from_u32(n) {
3232 out.push(c);
3233 }
3234 }
3235 }
3236 Some(other) => {
3237 out.push('\\');
3238 out.push(other);
3239 }
3240 None => out.push('\\'),
3241 }
3242 }
3243 out
3244 }
3245 let mut i = 0;
3246 while i < chars.len() {
3247 let c = chars[i];
3248 i += 1;
3249 match c {
3250 '#' => {
3251 // `(#)` — evaluate each element as an arithmetic
3252 // expression, then output the character with that
3253 // code point. Direct port of substevalchar in
3254 // src/zsh/Src/subst.c:1490-1520. zsh's flow:
3255 // ires = mathevali(ptr); // line 1497
3256 // if (errflag) return ""; // 1499-1502
3257 // if (ires < 0) zerr("character not in range"); // 1504-1506
3258 // if MULTIBYTE && ires>127: ucs4tomb // 1508-1511
3259 // else: single-byte sprintf // 1514-1518
3260 let to_char = |s: &str| -> String {
3261 let n = with_executor(|exec| crate::ported::math::mathevali(&crate::ported::subst::singsub(s)).unwrap_or(0));
3262 // zsh subst.c:1504-1518 — negative WARNS but
3263 // STILL outputs the low byte (truncated cast
3264 // through `(int)ires` + `%c` sprintf at line
3265 // 1514-1517). The zerr at line 1505 just sets
3266 // errflag without aborting the function. We
3267 // skip the error message (matches zsh's
3268 // observed silent behavior under -f -c) and
3269 // mirror the low-byte fallback.
3270 if !(0..=0x10FFFF).contains(&n) {
3271 // Truncated cast: low 8 bits as Latin-1
3272 // byte (zsh's `%c` sprintf on `(int)ires`).
3273 let byte = (n as i32 as u32) & 0xFF;
3274 // Encode the byte as raw — for high bytes
3275 // (0x80-0xFF), wrap with the same UTF-8
3276 // promotion zsh's pastebuf() uses.
3277 return char::from_u32(byte)
3278 .map(|c| c.to_string())
3279 .unwrap_or_default();
3280 }
3281 // Valid Unicode scalar — char::from_u32 returns
3282 // the right multi-byte UTF-8 sequence in Rust.
3283 char::from_u32(n as u32)
3284 .map(|c| c.to_string())
3285 .unwrap_or_default()
3286 };
3287 state = match state {
3288 St::S(s) => St::S(to_char(&s)),
3289 St::A(a) => St::A(a.into_iter().map(|s| to_char(&s)).collect()),
3290 };
3291 }
3292 'L' => {
3293 state = match state {
3294 St::S(s) => St::S(s.to_lowercase()),
3295 St::A(a) => St::A(a.into_iter().map(|s| s.to_lowercase()).collect()),
3296 };
3297 }
3298 'U' => {
3299 state = match state {
3300 St::S(s) => St::S(s.to_uppercase()),
3301 St::A(a) => St::A(a.into_iter().map(|s| s.to_uppercase()).collect()),
3302 };
3303 }
3304 'l' | 'r' => {
3305 // (l:N:) — left-pad to width N (truncate if longer).
3306 // (l:N::fill:) — pad with `fill` instead of space.
3307 // (r:N:) — right-pad to width N (truncate if longer).
3308 // Width must be followed by `:` (or `(` etc.) delim.
3309 let pad_left = c == 'l';
3310 if i >= chars.len() || !ZshrsHost::is_zsh_flag_delim(chars[i]) {
3311 // Bare `l`/`r` without delim — skip (only the
3312 // padded form takes a width).
3313 continue;
3314 }
3315 let delim = chars[i];
3316 i += 1;
3317 let mut width_str = String::new();
3318 while i < chars.len() && chars[i] != delim {
3319 width_str.push(chars[i]);
3320 i += 1;
3321 }
3322 if i < chars.len() {
3323 i += 1; // skip closing delim
3324 }
3325 // Width may be a literal number, `$VAR`, or a bare
3326 // identifier (zsh evaluates `(r:hlen:: :)` by
3327 // running `mathevali("hlen")` which reads the
3328 // parameter table). Direct port of Src/subst.c
3329 // `get_intarg()` (line 1428) which does
3330 // `parsestr` → `singsub` → `mathevali`. Fast path:
3331 // if the arg parses as a literal usize, use it
3332 // directly. Otherwise expand `$`-references and
3333 // route through evaluate_arithmetic so bare
3334 // identifiers resolve to their variable values.
3335 let width: usize = if let Ok(n) = width_str.parse() {
3336 n
3337 } else {
3338 let arith_str = crate::ported::subst::arithsubst(&width_str, "", "");
3339 arith_str.parse::<i64>().map(|v| v.unsigned_abs() as usize).unwrap_or(0)
3340 };
3341 // Optional `:fill:` after the width.
3342 let mut fill = String::from(" ");
3343 if i < chars.len() && ZshrsHost::is_zsh_flag_delim(chars[i]) {
3344 let d2 = chars[i];
3345 i += 1;
3346 let mut f = String::new();
3347 while i < chars.len() && chars[i] != d2 {
3348 f.push(chars[i]);
3349 i += 1;
3350 }
3351 if i < chars.len() {
3352 i += 1; // skip closing delim
3353 }
3354 if !f.is_empty() {
3355 fill = if print_escapes {
3356 print_escape_str(&f)
3357 } else {
3358 f
3359 };
3360 }
3361 }
3362 let pad_one = |s: String| -> String {
3363 let len = s.chars().count();
3364 if len >= width {
3365 return s.chars().take(width).collect();
3366 }
3367 let need = width - len;
3368 let mut filler = String::new();
3369 while filler.chars().count() < need {
3370 filler.push_str(&fill);
3371 }
3372 let filler: String = filler.chars().take(need).collect();
3373 if pad_left {
3374 format!("{}{}", filler, s)
3375 } else {
3376 format!("{}{}", s, filler)
3377 }
3378 };
3379 state = match state {
3380 St::S(s) => St::S(pad_one(s)),
3381 St::A(a) => St::A(a.into_iter().map(pad_one).collect()),
3382 };
3383 }
3384 'j' | 's' => {
3385 // zsh syntax: `(j:sep:)` and `(s:sep:)` use the char
3386 // following the flag as the delimiter. The delimiter must
3387 // be a non-alphanumeric, non-underscore char so subsequent
3388 // flags (alphabetic) aren't accidentally swallowed —
3389 // `(jL)` should be `j` (no delim, default IFS) followed
3390 // by `L`, not `j` with delim `L`. Recognized delim chars
3391 // mirror what zsh allows: punctuation only. zsh subst.c
3392 // get_strarg also accepts matched bracket pairs:
3393 // `[`/`]`, `{`/`}`, `(`/`)`, `<`/`>`.
3394 let mut sep = String::new();
3395 if i < chars.len() && ZshrsHost::is_zsh_flag_delim(chars[i]) {
3396 let delim = chars[i];
3397 let close = match delim {
3398 '[' => ']',
3399 '{' => '}',
3400 '(' => ')',
3401 '<' => '>',
3402 c => c,
3403 };
3404 i += 1;
3405 while i < chars.len() && chars[i] != close {
3406 sep.push(chars[i]);
3407 i += 1;
3408 }
3409 if i < chars.len() {
3410 i += 1; // skip closing delim
3411 }
3412 } else if c == 'j' {
3413 // `j` with no delim → join with space (IFS-default).
3414 sep = " ".to_string();
3415 }
3416 // `(p)` print-escape interpretation per
3417 // src/zsh/Src/subst.c:2381-2382 — `\n`, `\t`,
3418 // `\xNN`, `\NNN` (octal) etc. become the actual
3419 // characters in the separator. Additionally,
3420 // (p) enables \$VAR / \${VAR} / \$(cmd) /
3421 // \$((expr)) expansion in the separator string
3422 // (zsh's parsestr+singsub treatment of get_strarg
3423 // results when the (p) flag is present). Without
3424 // (p), these stay literal — confirmed via
3425 // /opt/homebrew/bin/zsh -fc.
3426 if print_escapes && !sep.is_empty() {
3427 sep = print_escape_str(&sep);
3428 if sep.contains('$') || sep.contains('`') {
3429 sep = crate::ported::subst::singsub(&sep);
3430 }
3431 }
3432 if c == 'j' {
3433 state = match state {
3434 St::A(a) => St::S(a.join(&sep)),
3435 St::S(s) => St::S(s),
3436 };
3437 } else {
3438 // (s) splits both scalars and array elements per
3439 // zsh semantics. `(@s:,:)` runs `@` first which
3440 // wraps a scalar in a 1-elem array; `s` must
3441 // still split that element. Same goes for true
3442 // arrays — flat-map split each element.
3443 //
3444 // Empty-field handling — verified against zsh's
3445 // C source (utils.c sepsplit + subst.c around
3446 // line 3273). The actual rule is NOT "drop all
3447 // empties" but more nuanced:
3448 // - Boundary empties (leading or trailing
3449 // run of separators) collapse to ONE empty
3450 // each, regardless of how many separators.
3451 // - Middle empties (consecutive separators
3452 // between non-empties) drop ENTIRELY.
3453 // - `(@)` flag preserves all empties verbatim.
3454 // Examples (no @):
3455 // "a,,b,,c" → [a,b,c] (3)
3456 // ",a,b" → ["",a,b] (3)
3457 // "a,b," → [a,b,""] (3)
3458 // ",,a,,b,," → ["",a,b,""] (4)
3459 // "a,,,b" → [a,b] (2, 3 middle empties)
3460 let keep_empty = chars.contains(&'@');
3461 let collapse = |s: &str, sep: &str| -> Vec<String> {
3462 let parts: Vec<String> = s.split(sep).map(String::from).collect();
3463 if keep_empty {
3464 return parts;
3465 }
3466 // Find first and last non-empty positions.
3467 let first_nonempty = parts.iter().position(|p| !p.is_empty());
3468 let last_nonempty = parts.iter().rposition(|p| !p.is_empty());
3469 match (first_nonempty, last_nonempty) {
3470 (None, _) => {
3471 // All-empty input. Collapse to a
3472 // single empty if input had any
3473 // separator (parts.len() > 1) and
3474 // therefore had a "boundary";
3475 // empty input → empty output.
3476 if parts.len() > 1 {
3477 vec![String::new()]
3478 } else {
3479 Vec::new()
3480 }
3481 }
3482 (Some(fi), Some(li)) => {
3483 let mut out: Vec<String> = Vec::new();
3484 if fi > 0 {
3485 out.push(String::new());
3486 }
3487 // Push only non-empty middles; drop
3488 // every internal empty.
3489 for p in &parts[fi..=li] {
3490 if !p.is_empty() {
3491 out.push(p.clone());
3492 }
3493 }
3494 if li < parts.len() - 1 {
3495 out.push(String::new());
3496 }
3497 out
3498 }
3499 _ => parts,
3500 }
3501 };
3502 state = match state {
3503 St::S(s) if sep.is_empty() => {
3504 St::A(s.chars().map(|c| c.to_string()).collect())
3505 }
3506 St::S(s) => St::A(collapse(&s, sep.as_str())),
3507 St::A(a) => {
3508 let mut out: Vec<String> = Vec::with_capacity(a.len());
3509 for elem in a {
3510 if sep.is_empty() {
3511 for c in elem.chars() {
3512 out.push(c.to_string());
3513 }
3514 } else {
3515 out.extend(collapse(&elem, sep.as_str()));
3516 }
3517 }
3518 St::A(out)
3519 }
3520 };
3521 }
3522 }
3523 'f' => {
3524 // Suppress the split entirely in scalar-assignment
3525 // context per Src/subst.c:3902 ssub gate. The
3526 // value passes through unchanged (preserves
3527 // original `\n` separators in `y="${(f)x}"`).
3528 if !ssub_runtime {
3529 state = match state {
3530 St::S(s) => St::A(s.split('\n').map(String::from).collect()),
3531 St::A(a) => {
3532 // Same flat-map rule as (s): split each element.
3533 let mut out: Vec<String> = Vec::with_capacity(a.len());
3534 for elem in a {
3535 for line in elem.split('\n') {
3536 out.push(line.to_string());
3537 }
3538 }
3539 St::A(out)
3540 }
3541 };
3542 }
3543 }
3544 '0' => {
3545 // `(0)` — split on NUL byte. Direct port of
3546 // src/zsh/Src/subst.c:2292-2297 which sets `spsep`
3547 // to a meta-encoded NUL. We split on the literal
3548 // `\0` character. Same flat-map behaviour as `(f)`.
3549 // Same ssub gate.
3550 if !ssub_runtime { state = match state {
3551 St::S(s) => St::A(s.split('\0').map(String::from).collect()),
3552 St::A(a) => {
3553 let mut out: Vec<String> = Vec::with_capacity(a.len());
3554 for elem in a {
3555 for piece in elem.split('\0') {
3556 out.push(piece.to_string());
3557 }
3558 }
3559 St::A(out)
3560 }
3561 }; }
3562 }
3563 'F' => {
3564 // (F) — join array elements with newlines (mirror
3565 // of (j:\n:) but as a one-letter shorthand).
3566 state = match state {
3567 St::A(a) => St::S(a.join("\n")),
3568 s => s,
3569 };
3570 }
3571 'Q' => {
3572 // (Q) — full shell-quoting reversal. Direct port of
3573 // Src/utils.c::dequotestring which scans the entire
3574 // string, handling SQ-spans (`'…'`), DQ-spans
3575 // (`"…"`) with backslash escapes, and standalone
3576 // `\X` escapes — NOT just outer-bslashquote strip. The
3577 // canonical roundtrip is `(qq)` → `(Q)` for strings
3578 // containing single quotes: `(qq)` of `a'b` produces
3579 // `'a'\''b'` and `(Q)` must reverse the four
3580 // close/escape/open transitions to recover `a'b`.
3581 // Earlier outer-bslashquote-strip left `a'\''b` literal.
3582 let dequote = |s: &str| -> String {
3583 let mut out = String::with_capacity(s.len());
3584 let mut chars = s.chars().peekable();
3585 while let Some(c) = chars.next() {
3586 match c {
3587 '\\' => {
3588 if let Some(&nx) = chars.peek() {
3589 out.push(nx);
3590 chars.next();
3591 }
3592 }
3593 '\'' => {
3594 while let Some(&inner) = chars.peek() {
3595 chars.next();
3596 if inner == '\'' {
3597 break;
3598 }
3599 out.push(inner);
3600 }
3601 }
3602 '"' => {
3603 while let Some(&inner) = chars.peek() {
3604 chars.next();
3605 if inner == '"' {
3606 break;
3607 }
3608 if inner == '\\' {
3609 if let Some(&esc) = chars.peek() {
3610 out.push(esc);
3611 chars.next();
3612 continue;
3613 }
3614 }
3615 out.push(inner);
3616 }
3617 }
3618 _ => out.push(c),
3619 }
3620 }
3621 out
3622 };
3623 state = match state {
3624 St::S(s) => St::S(dequote(&s)),
3625 St::A(a) => St::A(a.into_iter().map(|s| dequote(&s)).collect()),
3626 };
3627 }
3628 'z' => {
3629 // (z) — split by shell-token rules: whitespace
3630 // boundaries, BUT also split out shell metacharacters
3631 // like `;`, `&`, `|`, `(`, `)`, `<`, `>` as their
3632 // own tokens. Honors single/double quotes (treat
3633 // contents as one token, strip outer quotes from
3634 // the result). Matches zsh's `(z)` flag.
3635 state = match state {
3636 St::S(s) => St::A(bufferwords_z_tuple(&s, 0).0),
3637 St::A(a) => St::A(a),
3638 };
3639 }
3640 'w' => {
3641 // (w) — count words; in the array sense, just split
3642 // on whitespace and let downstream consumers count.
3643 state = match state {
3644 St::S(s) => St::A(s.split_whitespace().map(String::from).collect()),
3645 St::A(a) => St::A(a),
3646 };
3647 }
3648 'o' | 'O' => {
3649 // Optional sub-flag: `n` numeric, `i` case-insensitive,
3650 // `a` array-order (i.e. don't sort, just reverse for O).
3651 // Also detect `n`/`i` BEFORE the `o`/`O` (zsh's
3652 // `(no)` and `(io)` shapes — order-agnostic).
3653 let sub = chars.get(i).copied();
3654 let consume = matches!(sub, Some('n') | Some('i') | Some('a'));
3655 if consume {
3656 i += 1;
3657 }
3658 // Look back: was `n` or `i` already in the flags
3659 // string before this `o`? zsh treats `(no)` same
3660 // as `(on)` — numeric sort applied to the
3661 // ascending order. Only relevant if no inline sub
3662 // was found.
3663 let sub = if consume {
3664 sub
3665 } else {
3666 let prefix = &chars[..i.saturating_sub(1)];
3667 if prefix.contains(&'n') {
3668 Some('n')
3669 } else if prefix.contains(&'i') {
3670 Some('i')
3671 } else {
3672 None
3673 }
3674 };
3675 let consume = consume || matches!(sub, Some('n') | Some('i') | Some('a'));
3676 let descending = c == 'O';
3677 state = match state {
3678 St::A(mut a) => {
3679 match sub {
3680 Some('a') if consume => {
3681 if descending {
3682 a.reverse();
3683 }
3684 // ascending + array-order = no-op
3685 }
3686 Some('n') if consume => {
3687 // Natural sort: compare by chunks of
3688 // digits-vs-non-digits so "file10"
3689 // sorts after "file2".
3690 a.sort_by(|x, y| {
3691 let cmp = crate::extensions::stringsort::natural_cmp(x, y);
3692 if descending {
3693 cmp.reverse()
3694 } else {
3695 cmp
3696 }
3697 });
3698 }
3699 Some('i') if consume => {
3700 a.sort_by(|x, y| {
3701 let xl = x.to_lowercase();
3702 let yl = y.to_lowercase();
3703 if descending {
3704 yl.cmp(&xl)
3705 } else {
3706 xl.cmp(&yl)
3707 }
3708 });
3709 }
3710 _ => {
3711 if descending {
3712 a.sort_by(|x, y| y.cmp(x));
3713 } else {
3714 a.sort();
3715 }
3716 }
3717 }
3718 St::A(a)
3719 }
3720 s => s,
3721 };
3722 }
3723 'u' => {
3724 // Unique: preserve first occurrence, drop later dupes.
3725 state = match state {
3726 St::A(a) => {
3727 let mut seen = std::collections::HashSet::new();
3728 let unique: Vec<String> =
3729 a.into_iter().filter(|s| seen.insert(s.clone())).collect();
3730 St::A(unique)
3731 }
3732 s => s,
3733 };
3734 }
3735 'C' => {
3736 // `(C)` — capitalize. Direct port of
3737 // src/zsh/Src/hist.c:2239-2256 CASMOD_CAPS via
3738 // crate::ported::hist::casemodify. Treats any non-
3739 // alphanumeric (including punctuation, control
3740 // chars, NOT just whitespace) as a word boundary
3741 // and lowercases mid-word uppercase letters.
3742 state = match state {
3743 St::S(s) => {
3744 St::S(crate::ported::hist::casemodify(&s, crate::ported::hist::CASMOD_CAPS))
3745 }
3746 St::A(a) => St::A(
3747 a.into_iter()
3748 .map(|s| crate::ported::hist::casemodify(&s, crate::ported::hist::CASMOD_CAPS))
3749 .collect(),
3750 ),
3751 };
3752 }
3753 'V' => {
3754 // Make non-printable characters visible. zsh:
3755 // `^X` for control chars (X = char + 64); `\M-X`
3756 // for high-bit chars; backslash escapes for
3757 // common forms (\n, \t, \r). zshrs's separate
3758 // ZshParamFlag::Visible path implements this for
3759 // the multi-flag dispatcher, but the inline state
3760 // machine had no `V` arm so `${(V)x}` left
3761 // control chars raw.
3762 let visible = |s: &str| -> String {
3763 let mut out = String::with_capacity(s.len());
3764 for c in s.chars() {
3765 match c {
3766 '\n' => out.push_str("\\n"),
3767 '\t' => out.push_str("\\t"),
3768 '\r' => out.push_str("\\r"),
3769 c if c.is_control() => {
3770 out.push('^');
3771 out.push((c as u8 + 64) as char);
3772 }
3773 _ => out.push(c),
3774 }
3775 }
3776 out
3777 };
3778 state = match state {
3779 St::S(s) => St::S(visible(&s)),
3780 St::A(a) => St::A(a.into_iter().map(|s| visible(&s)).collect()),
3781 };
3782 }
3783 'D' => {
3784 // (D) named-directory substitution per
3785 // Src/subst.c:4155 (`mods & 1`) → substnamedir.
3786 // Replace $HOME prefix with `~` and any longer
3787 // named-dir match with `~name`. Per-element on
3788 // arrays, longest-prefix-first to avoid shallow
3789 // shadowing (a `~zpwr=/Users/wizard/zpwr`
3790 // override beats the bare `~=/Users/wizard`).
3791 let render_d = |s: &str| -> String {
3792 with_executor(|_exec| {
3793 let mut out = s.to_string();
3794 // First the longer named dirs.
3795 let mut entries: Vec<(String, std::path::PathBuf)> =
3796 crate::ported::hashnameddir::nameddirtab()
3797 .lock().ok()
3798 .map(|g| g.iter()
3799 .map(|(k, nd)| (k.clone(), std::path::PathBuf::from(&nd.dir)))
3800 .collect())
3801 .unwrap_or_default();
3802 entries.sort_by_key(|(_, p)| std::cmp::Reverse(p.as_os_str().len()));
3803 for (name, path) in &entries {
3804 let path_s = path.to_string_lossy();
3805 if !path_s.is_empty() && out.starts_with(path_s.as_ref()) {
3806 return format!(
3807 "~{}{}",
3808 name,
3809 &out[path_s.len()..]
3810 );
3811 }
3812 }
3813 // Then $HOME — only if no named-dir matched.
3814 if let Some(home) = crate::ported::params::getsparam("HOME") {
3815 if !home.is_empty() && out.starts_with(&home) {
3816 out = format!("~{}", &out[home.len()..]);
3817 }
3818 } else if let Ok(home) = std::env::var("HOME") {
3819 if !home.is_empty() && out.starts_with(&home) {
3820 out = format!("~{}", &out[home.len()..]);
3821 }
3822 }
3823 out
3824 })
3825 };
3826 state = match state {
3827 St::S(s) => St::S(render_d(&s)),
3828 St::A(a) => St::A(a.into_iter().map(|s| render_d(&s)).collect()),
3829 };
3830 }
3831 'P' => {
3832 // (P) was already applied as the pre-walker
3833 // initial-state transform — see `want_indirect`
3834 // above. The walker pass is a no-op for P.
3835 state = match state {
3836 St::S(s) => St::S(s),
3837 St::A(a) => St::A(a),
3838 };
3839 }
3840 '@' => {
3841 // Force array shape (scalar → 1-elem array).
3842 state = match state {
3843 St::S(s) => St::A(vec![s]),
3844 a => a,
3845 };
3846 }
3847 'k' => {
3848 // Keys of assoc. If immediately followed by 'v' (or
3849 // earlier state was already 'v'-set), interleave key/value
3850 // pairs (zsh's `(kv)` form). For regular arrays zsh
3851 // returns the values themselves (a quirk: docs say
3852 // "integer subscripts" but the actual implementation
3853 // returns array contents — verified against /bin/zsh).
3854 if i < chars.len() && chars[i] == 'v' {
3855 i += 1; // consume the 'v'
3856 let pairs = with_executor(|exec| {
3857 if let Some(m) = exec.assoc(&name) {
3858 let mut out = Vec::with_capacity(m.len() * 2);
3859 for (k, v) in m {
3860 out.push(k.clone());
3861 out.push(v.clone());
3862 }
3863 out
3864 } else if let Some(arr) = exec.array(&name) {
3865 arr
3866 } else {
3867 // Magic-assoc fallback for (kv): emit
3868 // alternating [key, value] pairs by
3869 // pairing magic_assoc_keys with
3870 // get_special_array_value lookups.
3871 if let Some(keys) = crate::exec::scan_magic_assoc_keys(&name) {
3872 let mut out = Vec::with_capacity(keys.len() * 2);
3873 for k in keys {
3874 let v = exec
3875 .get_special_array_value(&name, &k)
3876 .unwrap_or_default();
3877 out.push(k);
3878 out.push(v);
3879 }
3880 out
3881 } else {
3882 Vec::new()
3883 }
3884 }
3885 });
3886 state = St::A(pairs);
3887 } else {
3888 let keys = with_executor(|exec| {
3889 if let Some(m) = exec.assoc(&name) {
3890 m.keys().cloned().collect::<Vec<_>>()
3891 } else if let Some(arr) = exec.array(&name) {
3892 // zsh quirk: `(k)` on a regular array
3893 // returns the array values themselves.
3894 arr
3895 } else {
3896 // `${(k)<magic-assoc>}` — names like
3897 // `aliases`, `functions`, `options`,
3898 // `commands`, `terminfo`, `errnos`,
3899 // etc. Direct port of the per-special
3900 // scanfn dispatch (Src/Modules/
3901 // parameter.c et al.). Returns the
3902 // sorted key set the C source builds
3903 // by walking each magic table.
3904 crate::exec::scan_magic_assoc_keys(&name)
3905 .unwrap_or_default()
3906 }
3907 });
3908 state = St::A(keys);
3909 }
3910 }
3911 'v' => {
3912 // Values of assoc. If immediately followed by 'k',
3913 // interleave value/key pairs (zsh's `(vk)` form, less
3914 // common than `(kv)` but supported for symmetry).
3915 // Magic-assoc fallback when name isn't in
3916 // assoc_arrays (`aliases`, `functions`, `commands`,
3917 // `options`, `parameters`, `terminfo`, `errnos`,
3918 // `sysparams`) — synthesize the value list from the
3919 // executor's get_special_array_value scanfn-equivalent.
3920 if i < chars.len() && chars[i] == 'k' {
3921 i += 1; // consume the 'k'
3922 let pairs = with_executor(|exec| {
3923 if let Some(m) = exec.assoc(&name) {
3924 let mut out = Vec::with_capacity(m.len() * 2);
3925 for (k, v) in m {
3926 out.push(v.clone());
3927 out.push(k.clone());
3928 }
3929 out
3930 } else if let Some(keys) =
3931 crate::exec::scan_magic_assoc_keys(&name)
3932 {
3933 let mut out = Vec::with_capacity(keys.len() * 2);
3934 for k in keys {
3935 let v = exec
3936 .get_special_array_value(&name, &k)
3937 .unwrap_or_default();
3938 out.push(v);
3939 out.push(k);
3940 }
3941 out
3942 } else {
3943 Vec::new()
3944 }
3945 });
3946 state = St::A(pairs);
3947 } else {
3948 let vals = with_executor(|exec| {
3949 if let Some(m) = exec.assoc(&name) {
3950 m.values().cloned().collect::<Vec<_>>()
3951 } else if let Some(keys) =
3952 crate::exec::scan_magic_assoc_keys(&name)
3953 {
3954 keys.iter()
3955 .map(|k| {
3956 exec.get_special_array_value(&name, k)
3957 .unwrap_or_default()
3958 })
3959 .collect()
3960 } else {
3961 Vec::new()
3962 }
3963 });
3964 state = St::A(vals);
3965 }
3966 }
3967 '#' => {
3968 state = match state {
3969 St::A(a) => St::S(a.len().to_string()),
3970 St::S(s) => St::S(s.len().to_string()),
3971 };
3972 }
3973 'q' => {
3974 // (q) quoting flag — direct port of `case 'q':` in
3975 // Src/subst.c:2235-2253. zsh accepts ONLY:
3976 // q backslash-escape (QT_BACKSLASH)
3977 // qq single-bslashquote (QT_SINGLE)
3978 // qqq double-bslashquote (QT_DOUBLE)
3979 // qqqq $'…' ANSI-C (QT_DOLLARS)
3980 // q- QT_SINGLE_OPTIONAL (single-bslashquote if needed)
3981 // q+ QT_QUOTEDZPUTS (quotedzputs() format)
3982 // No `q*`, no `q!`, and crucially no `q:str:` delimiter
3983 // form — those were bot-invented extensions. The
3984 // `q:str:` arm in particular treated `@` as a delimiter
3985 // (since `@` is non-alphanumeric so `is_zsh_flag_delim`
3986 // returned true), capturing `explicit_delim=Some("")`
3987 // and then `s.replace("", "\\")` inserted `\` between
3988 // every char. That broke `${(qqqq@)arr}` and any other
3989 // q-flag combined with a flag-letter that's also non-
3990 // alphanumeric. Reference: zsh has no q-delimiter form.
3991 let mut level = 1;
3992 while i < chars.len() && chars[i] == 'q' && level < 4 {
3993 level += 1;
3994 i += 1;
3995 }
3996 let mut strip_trailing_newlines = false;
3997 let mut wrap_only_if_needed = false;
3998 let escape_glob_chars = false; // c:2235 (no q* in zsh)
3999 let explicit_delim: Option<String> = None; // c:2235 (no q:str: in zsh)
4000 while i < chars.len() {
4001 match chars[i] {
4002 '+' => {
4003 // c:2245-2246 — q+ → QT_QUOTEDZPUTS. Mapped
4004 // to wrap-only-if-needed pending a faithful
4005 // QT_QUOTEDZPUTS port.
4006 wrap_only_if_needed = true;
4007 i += 1;
4008 }
4009 '-' => {
4010 // c:2245-2246 — q- → QT_SINGLE_OPTIONAL.
4011 // Currently mapped to strip_trailing_newlines
4012 // pending a faithful QT_SINGLE_OPTIONAL port.
4013 strip_trailing_newlines = true;
4014 i += 1;
4015 }
4016 _ => break,
4017 }
4018 }
4019 let needs_quoting = |s: &str| -> bool {
4020 s.is_empty()
4021 || s.chars().any(|c| {
4022 c.is_whitespace()
4023 || matches!(
4024 c,
4025 '\'' | '"'
4026 | '\\'
4027 | '$'
4028 | '`'
4029 | '*'
4030 | '?'
4031 | '['
4032 | ']'
4033 | '{'
4034 | '}'
4035 | '('
4036 | ')'
4037 | '|'
4038 | '&'
4039 | ';'
4040 | '<'
4041 | '>'
4042 | '#'
4043 | '~'
4044 )
4045 })
4046 };
4047 let quote_one = |raw: &str| -> String {
4048 let s_owned: String;
4049 let s = if strip_trailing_newlines {
4050 s_owned = raw.trim_end_matches('\n').to_string();
4051 s_owned.as_str()
4052 } else {
4053 raw
4054 };
4055 if wrap_only_if_needed {
4056 // q+: skip quoting if the value is "shell-safe";
4057 // otherwise wrap with single-quotes (zsh's q+
4058 // promotes to single-bslashquote level when needed).
4059 if !needs_quoting(s) {
4060 return s.to_string();
4061 }
4062 return format!("'{}'", s.replace('\'', "'\\''"));
4063 }
4064 if let Some(ref d) = explicit_delim {
4065 // q:str: form — wrap value with the explicit
4066 // delimiter on each side, escaping inner d's
4067 // with backslash.
4068 let escaped = s.replace(d.as_str(), &format!("\\{}", d));
4069 return format!("{}{}{}", d, escaped, d);
4070 }
4071 match level {
4072 1 => {
4073 // q: backslash-escape every shell-special
4074 // char without surrounding quotes. zsh
4075 // special-cases the empty string: `${(q)x}`
4076 // for empty `x` outputs `''` (a real
4077 // single-quoted empty pair) so the
4078 // value survives word-splitting in the
4079 // consumer.
4080 if s.is_empty() {
4081 return "''".to_string();
4082 }
4083 let mut out = String::with_capacity(s.len() + 4);
4084 for c in s.chars() {
4085 if matches!(
4086 c,
4087 ' ' | '\t'
4088 | '\''
4089 | '"'
4090 | '\\'
4091 | '$'
4092 | '`'
4093 | '*'
4094 | '?'
4095 | '['
4096 | ']'
4097 | '{'
4098 | '}'
4099 | '('
4100 | ')'
4101 | '|'
4102 | '&'
4103 | ';'
4104 | '<'
4105 | '>'
4106 | '#'
4107 | '~'
4108 ) {
4109 out.push('\\');
4110 }
4111 out.push(c);
4112 }
4113 out
4114 }
4115 2 => {
4116 // qq: single-bslashquote, escape inner ' as '\''.
4117 let mut escaped = s.replace('\'', "'\\''");
4118 if escape_glob_chars {
4119 escaped = escaped.replace('*', "\\*").replace('?', "\\?");
4120 }
4121 format!("'{}'", escaped)
4122 }
4123 3 => {
4124 // qqq: double-bslashquote, escape $ ` " \\.
4125 let mut out = String::with_capacity(s.len() + 2);
4126 out.push('"');
4127 for c in s.chars() {
4128 match c {
4129 '$' | '`' | '"' | '\\' => {
4130 out.push('\\');
4131 out.push(c);
4132 }
4133 '*' | '?' if escape_glob_chars => {
4134 out.push('\\');
4135 out.push(c);
4136 }
4137 _ => out.push(c),
4138 }
4139 }
4140 out.push('"');
4141 out
4142 }
4143 _ => {
4144 // qqqq: ANSI-C $'…' style.
4145 let mut out = String::with_capacity(s.len() + 4);
4146 out.push_str("$'");
4147 for c in s.chars() {
4148 match c {
4149 '\\' => out.push_str("\\\\"),
4150 '\'' => out.push_str("\\'"),
4151 '\n' => out.push_str("\\n"),
4152 '\t' => out.push_str("\\t"),
4153 '\r' => out.push_str("\\r"),
4154 c if (c as u32) < 0x20 => {
4155 out.push_str(&format!("\\x{:02x}", c as u32));
4156 }
4157 c => out.push(c),
4158 }
4159 }
4160 out.push('\'');
4161 out
4162 }
4163 }
4164 };
4165 state = match state {
4166 St::S(s) => St::S(quote_one(&s)),
4167 St::A(a) => {
4168 // Empty array under `(q)`/`(qq)` flag emits a
4169 // single empty quoted pair (`''`) — zsh treats
4170 // the empty array as `[""]` for quoting so the
4171 // result still occupies a slot. Without this
4172 // special case, `${(qq)a}` for an empty `a`
4173 // produced an actually-empty string.
4174 if a.is_empty() {
4175 St::A(vec![quote_one("")])
4176 } else {
4177 St::A(a.into_iter().map(|s| quote_one(&s)).collect())
4178 }
4179 }
4180 };
4181 }
4182 'g' => {
4183 // Process backslash escapes (`\n`, `\t`, `\r`, `\\`,
4184 // `\xNN`, `\NNN` octal). Applied to the current scalar
4185 // or each array element.
4186 let unescape = |s: &str| -> String {
4187 let mut out = String::with_capacity(s.len());
4188 let mut chars = s.chars().peekable();
4189 while let Some(c) = chars.next() {
4190 if c != '\\' {
4191 out.push(c);
4192 continue;
4193 }
4194 match chars.next() {
4195 Some('n') => out.push('\n'),
4196 Some('t') => out.push('\t'),
4197 Some('r') => out.push('\r'),
4198 Some('\\') => out.push('\\'),
4199 Some('\'') => out.push('\''),
4200 Some('"') => out.push('"'),
4201 Some('0') => out.push('\0'),
4202 Some('a') => out.push('\x07'),
4203 Some('b') => out.push('\x08'),
4204 Some('f') => out.push('\x0c'),
4205 Some('v') => out.push('\x0b'),
4206 Some('x') => {
4207 let mut hex = String::new();
4208 for _ in 0..2 {
4209 if let Some(&h) = chars.peek() {
4210 if h.is_ascii_hexdigit() {
4211 hex.push(h);
4212 chars.next();
4213 } else {
4214 break;
4215 }
4216 }
4217 }
4218 if let Ok(b) = u8::from_str_radix(&hex, 16) {
4219 out.push(b as char);
4220 }
4221 }
4222 Some(other) => {
4223 out.push('\\');
4224 out.push(other);
4225 }
4226 None => out.push('\\'),
4227 }
4228 }
4229 out
4230 };
4231 state = match state {
4232 St::S(s) => St::S(unescape(&s)),
4233 St::A(a) => St::A(a.into_iter().map(|s| unescape(&s)).collect()),
4234 };
4235 }
4236 'n' => {
4237 // Numeric sort. Direct port of src/zsh/Src/sort.c:137-172
4238 // (eltpcmp's `if (sortnumeric)` block) and subst.c:2217
4239 // (case 'n' sets SORTIT_NUMERICALLY).
4240 //
4241 // Two flavors per zsh — controlled by sortnumeric value:
4242 // 1 (positive) — unsigned. A leading `-` is just
4243 // another non-digit char and is
4244 // compared lexicographically. (n)
4245 // alone takes this path.
4246 // -1 (negative) — signed. A `-` immediately preceding
4247 // digits flips the comparison so that
4248 // `-5 < -3 < 1`. Triggered by the
4249 // `-` flag char per subst.c:2220-2222
4250 // (case '-': sortit |= NUMERICALLY_SIGNED).
4251 //
4252 // We pre-scan the flag string for a literal `-` after
4253 // the `n` to enable signed mode. This matches the order-
4254 // independent behavior of zsh's flag dispatch (any
4255 // `-` in the (...) group enables signed mode for the
4256 // numeric sort).
4257 let signed = chars.contains(&'-');
4258 fn natural_cmp(a: &str, b: &str, signed: bool) -> std::cmp::Ordering {
4259 if signed {
4260 // Strip a leading sign and compare numerically
4261 // when both look like signed integers. Falls
4262 // back to per-char compare when not numeric.
4263 let parse_signed = |s: &str| -> Option<i128> {
4264 let bytes = s.as_bytes();
4265 if bytes.is_empty() {
4266 return None;
4267 }
4268 let (neg, rest) = match bytes[0] {
4269 b'-' if bytes.len() > 1 && bytes[1].is_ascii_digit() => {
4270 (true, &s[1..])
4271 }
4272 b'+' if bytes.len() > 1 && bytes[1].is_ascii_digit() => {
4273 (false, &s[1..])
4274 }
4275 c if c.is_ascii_digit() => (false, s),
4276 _ => return None,
4277 };
4278 rest.parse::<i128>().ok().map(|n| if neg { -n } else { n })
4279 };
4280 if let (Some(va), Some(vb)) = (parse_signed(a), parse_signed(b)) {
4281 return va.cmp(&vb);
4282 }
4283 // fall through to natural compare below
4284 }
4285 let mut ai = a.chars().peekable();
4286 let mut bi = b.chars().peekable();
4287 loop {
4288 match (ai.peek(), bi.peek()) {
4289 (None, None) => return Ordering::Equal,
4290 (None, _) => return Ordering::Less,
4291 (_, None) => return Ordering::Greater,
4292 (Some(ca), Some(cb))
4293 if ca.is_ascii_digit() && cb.is_ascii_digit() =>
4294 {
4295 let mut na = String::new();
4296 while let Some(&c) = ai.peek() {
4297 if c.is_ascii_digit() {
4298 na.push(c);
4299 ai.next();
4300 } else {
4301 break;
4302 }
4303 }
4304 let mut nb = String::new();
4305 while let Some(&c) = bi.peek() {
4306 if c.is_ascii_digit() {
4307 nb.push(c);
4308 bi.next();
4309 } else {
4310 break;
4311 }
4312 }
4313 let va: u128 = na.parse().unwrap_or(0);
4314 let vb: u128 = nb.parse().unwrap_or(0);
4315 match va.cmp(&vb) {
4316 Ordering::Equal => continue,
4317 ord => return ord,
4318 }
4319 }
4320 (Some(&ca), Some(&cb)) => {
4321 ai.next();
4322 bi.next();
4323 match ca.cmp(&cb) {
4324 Ordering::Equal => continue,
4325 ord => return ord,
4326 }
4327 }
4328 }
4329 }
4330 }
4331 state = match state {
4332 St::A(mut a) => {
4333 a.sort_by(|x, y| natural_cmp(x, y, signed));
4334 St::A(a)
4335 }
4336 s => s,
4337 };
4338 }
4339 '-' => {
4340 // `(-)` — signed-numeric sort modifier per
4341 // src/zsh/Src/subst.c:2220-2222. The actual sort
4342 // happens in the `n` arm above; this arm just
4343 // consumes the flag char so unrecognized-flag
4344 // paths don't trip on it.
4345 }
4346 'i' => {
4347 // Case-insensitive sort. Re-applies sort using lowercase
4348 // comparison; if the array isn't sorted, this is the
4349 // sort-key.
4350 state = match state {
4351 St::A(mut a) => {
4352 a.sort_by_key(|x| x.to_lowercase());
4353 St::A(a)
4354 }
4355 s => s,
4356 };
4357 }
4358 't' => {
4359 // Type query. zsh's `(t)` flag returns the base
4360 // type plus any attribute markers separated by `-`.
4361 // Examples: `integer`, `float`, `scalar-readonly`,
4362 // `scalar-export`, `scalar-left` (typeset -L N),
4363 // `scalar-right_blanks`, `array`, `association`.
4364 //
4365 // `(Pt)` combo: direct port of Src/subst.c:2807-2854.
4366 // zsh's `wantt` reads `v->pm->node.flags` AFTER
4367 // `aspar` has resolved the indirect target's Param.
4368 // We mirror that: for (Pt), look up `name`'s scalar
4369 // value to get the target name, then introspect
4370 // THAT parameter's type. The value pre-walker was
4371 // skipped above for the Pt combo.
4372 let target = if pt_combo {
4373 with_executor(|exec| exec.get_variable(&name))
4374 } else {
4375 name.clone()
4376 };
4377 let kind = with_executor(|exec| {
4378 // Delegate to the canonical (t)-flag formatter
4379 // which reads PM_TYPE flags from paramtab. The
4380 // exec.rs "parameters" arm of get_special_array
4381 // _value handles the same PM_INTEGER / PM_FFLOAT
4382 // / PM_LOWER / PM_READONLY flag dispatch.
4383 exec.get_special_array_value("parameters", &target)
4384 .unwrap_or_default()
4385 });
4386 state = St::S(kind);
4387 }
4388 '%' => {
4389 // Prompt expansion: process %F %B %f %{ %} etc. via the
4390 // executor's expand_prompt. Useful for building prompts
4391 // out of stored fragments.
4392 state = match state {
4393 St::S(s) => St::S(with_executor(|exec| exec.expand_prompt_string(&s))),
4394 St::A(a) => St::A(
4395 a.into_iter()
4396 .map(|s| with_executor(|exec| exec.expand_prompt_string(&s)))
4397 .collect(),
4398 ),
4399 };
4400 }
4401 'e' => {
4402 // Per zshexpn(1): "perform parameter expansion,
4403 // command substitution and arithmetic expansion
4404 // on the resulting word". Apply expand_string so
4405 // `\$var` (literal `$var` in the value) becomes
4406 // the value of $var, `\$(cmd)` runs the cmd, etc.
4407 let eval_one =
4408 |s: &str| -> String { crate::ported::subst::singsub(s) };
4409 state = match state {
4410 St::S(s) => St::S(eval_one(&s)),
4411 St::A(a) => St::A(a.into_iter().map(|s| eval_one(&s)).collect()),
4412 };
4413 }
4414 'p' => {
4415 // Print-style escape processing (mirrors print -e). Same
4416 // as `g` for the escape set we support — they differ in
4417 // zsh on some niche `\c` and `\E` forms, which we map
4418 // identically.
4419 let unescape = |s: &str| -> String {
4420 let mut out = String::with_capacity(s.len());
4421 let mut chars = s.chars().peekable();
4422 while let Some(c) = chars.next() {
4423 if c != '\\' {
4424 out.push(c);
4425 continue;
4426 }
4427 match chars.next() {
4428 Some('n') => out.push('\n'),
4429 Some('t') => out.push('\t'),
4430 Some('r') => out.push('\r'),
4431 Some('\\') => out.push('\\'),
4432 Some('e') | Some('E') => out.push('\x1b'),
4433 Some(other) => {
4434 out.push('\\');
4435 out.push(other);
4436 }
4437 None => out.push('\\'),
4438 }
4439 }
4440 out
4441 };
4442 state = match state {
4443 St::S(s) => St::S(unescape(&s)),
4444 St::A(a) => St::A(a.into_iter().map(|s| unescape(&s)).collect()),
4445 };
4446 }
4447 'A' => {
4448 // Coerce to array shape (alias of @). Mostly affects
4449 // downstream flags that treat scalar vs array
4450 // differently.
4451 state = match state {
4452 St::S(s) => St::A(vec![s]),
4453 a => a,
4454 };
4455 }
4456 '~' => {
4457 // Pattern-toggle: in zsh this enables glob-pattern
4458 // interpretation of the value in subsequent matches. The
4459 // bytecode dispatch already glob-matches via `Op::StrMatch`
4460 // when relevant; without a stateful match-context this
4461 // flag is a no-op pass-through. tracing::debug records
4462 // the request.
4463 tracing::debug!("PARAM_FLAG ~ — no-op pass-through (no match-context state)");
4464 }
4465 'p' => {
4466 // `(p)` — print-style escapes for OTHER flag args.
4467 // Already detected by the pre-scan above; here we
4468 // just consume the flag char without mutating
4469 // state (no-op on the value itself). Matches
4470 // src/zsh/Src/subst.c:2381-2382.
4471 }
4472 'g' => {
4473 // `(g)` — apply print-style escape decoding to
4474 // the operand value itself, with sub-flags
4475 // selecting which escape conventions to honor.
4476 // Sub-flags from src/zsh/Src/subst.c:2409-2436:
4477 // e — emacs-style: \C-x, \M-x, \e
4478 // o — octal: \NNN
4479 // c — caret notation: ^X for control chars
4480 // We honor any combination by running the same
4481 // C-style interpreter that `(p)` uses on `(s::)`
4482 // args; sub-flags currently widen but do not
4483 // narrow the escape set.
4484 if i < chars.len() && ZshrsHost::is_zsh_flag_delim(chars[i]) {
4485 let d = chars[i];
4486 i += 1;
4487 // Consume the sub-flag chars (e/o/c) — recorded
4488 // for documentation; the escape interpreter
4489 // below already handles all three cases.
4490 while i < chars.len() && chars[i] != d {
4491 i += 1;
4492 }
4493 if i < chars.len() {
4494 i += 1; // skip closing delim
4495 }
4496 }
4497 state = match state {
4498 St::S(s) => St::S(print_escape_str(&s)),
4499 St::A(a) => St::A(a.into_iter().map(|s| print_escape_str(&s)).collect()),
4500 };
4501 }
4502 '_' => {
4503 // `(_)` — reserved for future use per
4504 // src/zsh/Src/subst.c:2485-2502. Consume the
4505 // delim-bracketed arg if present so we don't
4506 // mis-parse subsequent flags.
4507 if i < chars.len() && ZshrsHost::is_zsh_flag_delim(chars[i]) {
4508 let d = chars[i];
4509 i += 1;
4510 while i < chars.len() && chars[i] != d {
4511 i += 1;
4512 }
4513 if i < chars.len() {
4514 i += 1;
4515 }
4516 }
4517 }
4518 'b' | 'B' => {
4519 // (b)/(B) — backslash-escape shell + pattern metas
4520 // (whitespace, glob/redirect/bslashquote/expansion specials).
4521 let escape = |s: &str| -> String {
4522 let mut r = String::new();
4523 for c in s.chars() {
4524 if "\\*?[]{}()<>&|;\"'$`!#~ \t\n".contains(c) {
4525 r.push('\\');
4526 }
4527 r.push(c);
4528 }
4529 r
4530 };
4531 state = match state {
4532 St::S(s) => St::S(escape(&s)),
4533 St::A(a) => St::A(a.iter().map(|s| escape(s)).collect()),
4534 };
4535 }
4536 _ => {
4537 // Unknown flag — silently skip. The maintainer's "no
4538 // friendly nags" rule means we don't print "unsupported
4539 // flag X"; tracing::debug records it in the log.
4540 tracing::debug!(flag = %c, "BUILTIN_PARAM_FLAG: unknown flag");
4541 }
4542 }
4543 }
4544
4545 // Direct port of Src/subst.c:3901-3933. When the caller is in
4546 // DQ context AND the state landed in `St::A` (e.g. via `(f)`
4547 // line-split, `(s:…:)` arbitrary split, or assoc/array seed
4548 // with no `[@]` splice), zsh's paramsubst joins the array back
4549 // into a single scalar via `sepjoin(aval, sep, 1)`:
4550 //
4551 // • If `sep` is non-NULL (set by `(F)` / `(j:…:)`), join
4552 // with that exact separator.
4553 // • Else if `spsep` is non-NULL (set by `(f)` / `(s:…:)`),
4554 // `sepjoin` falls back to the first IFS char (space by
4555 // default for `IFS=$' \t\n'`).
4556 //
4557 // Without this, `echo "[${(f)x}]"` (DQ) would word-split the
4558 // array into 3 separate echo args (`[line1] [line2] [line3]`)
4559 // instead of zsh's `[line1 line2 line3]`. The explicit `[@]`
4560 // splice operator OR `(@)` flag suppresses this collapse —
4561 // both already covered by `has_at_subscript` above.
4562 //
4563 // Skip the collapse when nested inside ANOTHER `${...}` —
4564 // `${${(f)x}[2]}` needs the inner `(f)` to keep its array
4565 // shape so the outer `[2]` can subscript element-2. C zsh
4566 // tracks this through paramsubst's recursion (the inner call
4567 // returns aval; outer operates on aval before any sepjoin).
4568 // We detect the same condition via `in_paramsubst_nest`,
4569 // bumped by every BUILTIN_PARAM_FLAG / BUILTIN_PARAM_*
4570 // recursion entry.
4571 // The DQ collapse fires only for "bare" arrays — those that
4572 // came from `${arr}` / `${assoc}` without a split flag. When
4573 // any split flag (`(z)`, `(f)`, `(s:STR:)`, `(0)`, `(=)`) was
4574 // applied the array shape is INTENTIONAL: zsh keeps it
4575 // multi-word inside DQ. Direct port of Src/subst.c's
4576 // `nojoin` behavior — the split flags set nojoin=1 which
4577 // causes paramsubst to skip sepjoin even in DQ.
4578 let split_flag_active = flags.contains('z')
4579 || flags.contains('f')
4580 || flags.contains('s')
4581 || flags.contains('0')
4582 || flags.contains('=');
4583 // Canonical paramsubst-nest counter — `IN_PARAMSUBST_NEST`
4584 // thread_local in `subst.rs` (mirrors `paramsub_nest` global
4585 // in `Src/subst.c`).
4586 let is_nested = crate::ported::subst::IN_PARAMSUBST_NEST
4587 .with(|c| c.get() > 1);
4588 if (dq_compile || dq_runtime) && !has_at_subscript && !is_nested && !split_flag_active {
4589 if let St::A(a) = state {
4590 // Pick the join separator. `(F)` (the last F seen) is
4591 // tracked via `flags.contains('F')`; `(j:str:)` runs
4592 // earlier in the loop and stores the result already
4593 // joined as `St::S(_)`, so we only see `St::A` here
4594 // for split-style flags. The default is the first
4595 // char of $IFS (space when IFS is the zsh default).
4596 let sep = if flags.contains('F') {
4597 "\n".to_string()
4598 } else {
4599 with_executor(|exec| {
4600 let ifs = exec.get_variable("IFS");
4601 ifs.chars().next().map(|c| c.to_string()).unwrap_or_else(|| " ".to_string())
4602 })
4603 };
4604 return Value::str(a.join(&sep));
4605 }
4606 }
4607
4608 match state {
4609 St::S(s) => Value::str(s),
4610 St::A(a) => Value::Array(a.into_iter().map(Value::str).collect()),
4611 }
4612 });
4613
4614 // `foo[key]=val` — single-key set on an assoc array. Stack: [name, key, value].
4615 vm.register_builtin(BUILTIN_SET_ASSOC, |vm, _argc| {
4616 let value = vm.pop().to_str();
4617 let key = vm.pop().to_str();
4618 let name = vm.pop().to_str();
4619 with_executor(|exec| {
4620 // PFA-SMR aspect: subscript assignment `arr[N]=val` /
4621 // `assoc[key]=val`. Recorded as a structured assoc/array
4622 // event with the (key, value) pair preserved in
4623 // `value_assoc` so replay can reconstruct the exact slot.
4624 // Path-family arrays come through SET_ARRAY / APPEND_ARRAY,
4625 // never here, so no path_mod routing.
4626 #[cfg(feature = "recorder")]
4627 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
4628 let ctx = exec.recorder_ctx();
4629 let attrs = exec.recorder_attrs_for(&name);
4630 crate::recorder::emit_assoc_assign(
4631 &name,
4632 vec![(key.clone(), value.clone())],
4633 attrs,
4634 true, // element-add semantics, not full replace
4635 ctx,
4636 );
4637 }
4638 // Indexed array element assign `a[N]=val`. Routes here when
4639 // `name` is already an indexed array. For unset names, only
4640 // treat as indexed if the key is unambiguously numeric (a
4641 // literal int) — `foo[key]=val` with no prior storage and
4642 // a string key should create an assoc (zsh default), not an
4643 // indexed array. zsh's rule: numeric subscript on an
4644 // indexed array (or new var with numeric key) assigns to
4645 // the 1-based slot, growing the array if needed. Negative
4646 // indices count from the end.
4647 let is_indexed = exec.array(&name).is_some();
4648 let is_assoc = exec.assoc(&name).is_some();
4649 let key_literal_int = key.trim().parse::<i64>().ok();
4650 // For an existing indexed array, fall back to arith eval so
4651 // `a[i+1]=v` works when `i` is set.
4652 let key_int_for_indexed = if is_indexed {
4653 key_literal_int.or_else(|| Some(crate::ported::math::mathevali(&crate::ported::subst::singsub(&key)).unwrap_or(0)))
4654 } else {
4655 key_literal_int
4656 };
4657 let route_indexed = if is_assoc {
4658 false
4659 } else if is_indexed {
4660 key_int_for_indexed.is_some()
4661 } else {
4662 key_literal_int.is_some()
4663 };
4664 if let (true, Some(i)) = (route_indexed, key_int_for_indexed) {
4665 let len = exec.array(&name).map(|a| a.len() as i64).unwrap_or(0);
4666 let idx = if i > 0 {
4667 (i - 1) as usize
4668 } else if i < 0 {
4669 let off = len + i;
4670 if off < 0 {
4671 return;
4672 }
4673 off as usize
4674 } else {
4675 // zsh: `a[0]=v` is "assignment to invalid subscript
4676 // range" (positionals/arrays are 1-based). Mirror
4677 // the diagnostic and abort with status 1.
4678 eprintln!("zshrs:1: {}: assignment to invalid subscript range", name);
4679 std::process::exit(1);
4680 };
4681 // Read paramtab-first, mutate, write back via
4682 // canonical set_array so the assignment is visible
4683 // to both the legacy cache and paramtab.
4684 let mut arr = exec.array(&name).unwrap_or_default();
4685 while arr.len() <= idx {
4686 arr.push(String::new());
4687 }
4688 arr[idx] = value;
4689 exec.set_array(name, arr);
4690 return;
4691 }
4692 // Default: assoc set.
4693 exec.unset_scalar(&name);
4694 let mut map = exec.assoc(&name).unwrap_or_default();
4695 map.insert(key, value);
4696 exec.set_assoc(name, map);
4697 });
4698 Value::Status(0)
4699 });
4700
4701 // Brace expansion. Routes through executor.xpandbraces (already
4702 // implemented for the pre-fusevm executor). Returns Value::Array.
4703 vm.register_builtin(BUILTIN_WORD_SPLIT, |vm, _argc| {
4704 let s = vm.pop().to_str();
4705 let ifs = with_executor(|exec| {
4706 exec.scalar("IFS")
4707 .unwrap_or_else(|| " \t\n".to_string())
4708 });
4709 // Direct port of multsub's IFS-split path (src/zsh/Src/subst.c:
4710 // 567-680). zsh distinguishes WHITESPACE IFS (default) from
4711 // NON-WHITESPACE IFS:
4712 // - whitespace IFS chars (space/tab/newline): runs of separator
4713 // collapse and empty fields are SUPPRESSED
4714 // - non-whitespace IFS chars: every separator boundary creates a
4715 // field, including empties between adjacent separators
4716 // Mixed IFS treats whitespace runs as collapsing, but a single
4717 // non-whitespace IFS character creates a field boundary regardless.
4718 // zsh's default IFS is " \t\n\0" (space, tab, newline, NUL).
4719 // Treat NUL as whitespace-class so the default-IFS path
4720 // collapses runs and suppresses empties; without this the
4721 // NUL char triggered the non-whitespace branch and emitted
4722 // empty fields between every separator.
4723 let only_ws = ifs.chars().all(|c| matches!(c, ' ' | '\t' | '\n' | '\0'));
4724 let parts: Vec<fusevm::Value> = if only_ws {
4725 s.split(|c: char| ifs.contains(c))
4726 .filter(|p| !p.is_empty())
4727 .map(fusevm::Value::str)
4728 .collect()
4729 } else {
4730 // Non-whitespace IFS: preserve every separator boundary,
4731 // including empty fields. Matches zsh's behaviour for
4732 // `IFS=:; ${=a}` on `x:y::z` -> [x, y, "", z].
4733 s.split(|c: char| ifs.contains(c))
4734 .map(fusevm::Value::str)
4735 .collect()
4736 };
4737 // zsh: word-splitting an empty value yields ZERO words, not one
4738 // empty word. `unset b; for w in ${=b}` iterates zero times.
4739 // Whitespace-IFS path filtered out the empties already; the
4740 // non-whitespace path may have produced a single-empty Vec from
4741 // `"".split(...)` which still iterates once — collapse to an
4742 // empty Array so for-loops and arg expansion see no words.
4743 if parts.is_empty() || (parts.len() == 1 && parts[0].to_str().is_empty()) {
4744 fusevm::Value::Array(Vec::new())
4745 } else if parts.len() == 1 {
4746 parts.into_iter().next().unwrap()
4747 } else {
4748 fusevm::Value::Array(parts)
4749 }
4750 });
4751
4752 vm.register_builtin(BUILTIN_BRACE_EXPAND, |vm, _argc| {
4753 let s = vm.pop().to_str();
4754 // Direct call to the canonical brace expander (port of
4755 // Src/glob.c::xpandbraces at glob.rs:1678). Was stubbed
4756 // as `vec![s]` — every `print X{1,2,3}Y` returned literal.
4757 let brace_ccl = with_executor(|exec|
4758 crate::ported::options::opt_state_get("braceccl").unwrap_or(false));
4759 let parts = crate::ported::glob::xpandbraces(&s, brace_ccl);
4760 if parts.len() == 1 {
4761 fusevm::Value::str(parts.into_iter().next().unwrap_or_default())
4762 } else {
4763 fusevm::Value::Array(parts.into_iter().map(fusevm::Value::str).collect())
4764 }
4765 });
4766
4767 // `[[ s =~ pat ]]` regex match — extra-builtin fallback path so the
4768 // conditional grammar can route here when Op::RegexMatch isn't wired.
4769 // Uses the same regex cache as the host method.
4770 vm.register_builtin(BUILTIN_REGEX_MATCH, |vm, _argc| {
4771 let pat = vm.pop().to_str();
4772 let s = vm.pop().to_str();
4773 // Same untokenize before regex compile as ZshrsHost::regex_match
4774 // — Snull/DQ markers from quoted patterns must be stripped
4775 // before the regex engine sees them. Direct port of
4776 // bin_test/cond_match's untokenize() call.
4777 let pat = crate::lex::untokenize(&pat);
4778 let s = crate::lex::untokenize(&s);
4779 let mut cache = REGEX_CACHE.lock();
4780 let matched = if let Some(re) = cache.get(&pat) {
4781 re.is_match(&s)
4782 } else {
4783 match regex::Regex::new(&pat) {
4784 Ok(re) => {
4785 let m = re.is_match(&s);
4786 cache.insert(pat.clone(), re);
4787 m
4788 }
4789 Err(_) => false,
4790 }
4791 };
4792 if matched {
4793 Value::Status(0)
4794 } else {
4795 Value::Status(1)
4796 }
4797 });
4798
4799 // `*(qual)` glob qualifier filter. Stack: [pattern, qualifier].
4800 // Pattern is glob-expanded normally, then each result is filtered by the
4801 // qualifier predicate. Common qualifiers:
4802 // . — regular files only
4803 // / — directories only
4804 // @ — symlinks
4805 // x — executable
4806 // r/w/x — readable/writable/executable
4807 // N — nullglob (no error if no match)
4808 // L+N / L-N — size > N / size < N (in bytes)
4809 // mh-N / mh+N — modified within N hours / older than N hours
4810 // md-N / md+N — modified within N days / older than N days
4811 // on/On — sort by name asc/desc (default)
4812 // oL/OL — sort by length
4813 // om/Om — sort by mtime
4814 // Pop a scalar pattern, run expand_glob, push Value::Array. Used
4815 // by the segment-concat compile path for `$D/*`-style words.
4816 vm.register_builtin(BUILTIN_GLOB_EXPAND, |vm, _argc| {
4817 let pattern = vm.pop().to_str();
4818 let matches = with_executor(|exec| exec.expand_glob(&pattern));
4819 if matches.is_empty() {
4820 // expand_glob handles NOMATCH internally; if it returns
4821 // empty here, nullglob was on. Yield empty array.
4822 return fusevm::Value::Array(Vec::new());
4823 }
4824 if matches.len() == 1 && matches[0] == pattern {
4825 // No real matches; expand_glob returned the literal. Pass
4826 // back as scalar so downstream ops don't re-flatten.
4827 return fusevm::Value::str(pattern);
4828 }
4829 fusevm::Value::Array(matches.into_iter().map(fusevm::Value::str).collect())
4830 });
4831
4832 vm.register_builtin(BUILTIN_GLOB_QUALIFIED, |vm, _argc| {
4833 let qual = vm.pop().to_str();
4834 let pattern = vm.pop().to_str();
4835 let nullglob = qual.contains('N');
4836 let mut matches = with_executor(|exec| exec.expand_glob(&pattern));
4837 if matches.is_empty() && !nullglob {
4838 // Default: keep the unmatched pattern (zsh's default unless N is set)
4839 return fusevm::Value::Array(vec![fusevm::Value::str(pattern)]);
4840 }
4841 // Filter by predicates that require stat
4842 matches.retain(|path| {
4843 // zsh's `-` modifier in glob qualifiers (`*(-.)`) means
4844 // "follow symlinks before applying the test". Without
4845 // `-`, `(.)` uses lstat (skipping symlinks even when
4846 // they target a regular file). Direct port of zsh's
4847 // pattern.c qualifier parser — the QUAL_NULL bit is set
4848 // by `-` and switches stat→lstat-vs-stat. Default Rust
4849 // `fs::metadata` follows symlinks; use `symlink_metadata`
4850 // by default, switch to `metadata` when `-` is in the
4851 // qualifier set.
4852 let follow_symlinks = qual.contains('-');
4853 let meta_res = if follow_symlinks {
4854 fs::metadata(path)
4855 } else {
4856 fs::symlink_metadata(path)
4857 };
4858 let meta = match meta_res {
4859 Ok(m) => m,
4860 Err(_) => return qual.contains('N'),
4861 };
4862 let mut keep = true;
4863 for c in qual.chars() {
4864 match c {
4865 '.' => keep &= meta.is_file(),
4866 '/' => keep &= meta.is_dir(),
4867 '@' => {
4868 // is_symlink requires fs::symlink_metadata for the
4869 // path itself, not the target.
4870 keep &= fs::symlink_metadata(path)
4871 .map(|m| m.file_type().is_symlink())
4872 .unwrap_or(false);
4873 }
4874 'x' => {
4875 keep &= meta.permissions().mode() & 0o111 != 0;
4876 }
4877 'r' => {
4878 keep &= meta.permissions().mode() & 0o444 != 0;
4879 }
4880 'w' => {
4881 keep &= meta.permissions().mode() & 0o222 != 0;
4882 }
4883 _ => {}
4884 }
4885 if !keep {
4886 break;
4887 }
4888 }
4889 keep
4890 });
4891 // Sort modifiers
4892 if qual.contains("on") || qual.contains('o') && !qual.contains("om") && !qual.contains("oL")
4893 {
4894 matches.sort();
4895 }
4896 if qual.contains("On")
4897 || (qual.contains('O') && !qual.contains("Om") && !qual.contains("OL"))
4898 {
4899 matches.sort();
4900 matches.reverse();
4901 }
4902 if qual.contains("oL") {
4903 matches.sort_by_key(|p| std::fs::metadata(p).map(|m| m.len()).unwrap_or(0));
4904 }
4905 if qual.contains("OL") {
4906 matches.sort_by_key(|p| {
4907 std::cmp::Reverse(std::fs::metadata(p).map(|m| m.len()).unwrap_or(0))
4908 });
4909 }
4910 if qual.contains("om") {
4911 matches.sort_by_key(|p| {
4912 std::fs::metadata(p)
4913 .and_then(|m| m.modified())
4914 .map(|t| {
4915 std::cmp::Reverse(
4916 t.duration_since(std::time::UNIX_EPOCH)
4917 .map(|d| d.as_secs())
4918 .unwrap_or(0),
4919 )
4920 })
4921 .unwrap_or(std::cmp::Reverse(0))
4922 });
4923 }
4924 if qual.contains("Om") {
4925 matches.sort_by_key(|p| {
4926 std::fs::metadata(p)
4927 .and_then(|m| m.modified())
4928 .map(|t| {
4929 t.duration_since(std::time::UNIX_EPOCH)
4930 .map(|d| d.as_secs())
4931 .unwrap_or(0)
4932 })
4933 .unwrap_or(0)
4934 });
4935 }
4936 // (M) mark-dirs / (T) list-types qualifiers — direct port of
4937 // zsh/Src/glob.c:1557-1566 (case 'M' / case 'T'). zsh appends
4938 // a single char to each output (or only to dirs for `M`):
4939 // / directory * executable regular file
4940 // @ symlink | fifo
4941 // = socket # block device % char device
4942 //
4943 // M alone marks ONLY directories with `/`; T marks every
4944 // file with its type char. Both sourced from glob.c:355,372
4945 // emit-side logic on gf_markdirs / gf_listtypes flags.
4946 let mark_dirs = qual.contains('M');
4947 let list_types = qual.contains('T');
4948 if mark_dirs || list_types {
4949 matches = matches
4950 .into_iter()
4951 .map(|p| {
4952 let meta = match std::fs::symlink_metadata(&p) {
4953 Ok(m) => m,
4954 Err(_) => return p,
4955 };
4956 let mode = meta.permissions().mode();
4957 let ch = crate::glob::file_type(mode);
4958 if list_types || (mark_dirs && ch == '/') {
4959 format!("{}{}", p, ch)
4960 } else {
4961 p
4962 }
4963 })
4964 .collect();
4965 }
4966 fusevm::Value::Array(matches.into_iter().map(fusevm::Value::str).collect())
4967 });
4968
4969 // `break`/`continue` from a sub-VM body. The compile path emits these
4970 // when the keyword appears at chunk top-level (no enclosing for/while in
4971 // the current chunk's patch lists). Outer-loop builtins (BUILTIN_RUN_
4972 // SELECT and any future loop-via-builtin construct) drain
4973 // executor.loop_signal after each iteration.
4974 vm.register_builtin(BUILTIN_SET_BREAK, |_vm, _argc| {
4975 with_executor(|exec| {
4976 exec.loop_signal = Some(LoopSignal::Break);
4977 });
4978 Value::Status(0)
4979 });
4980 vm.register_builtin(BUILTIN_SET_CONTINUE, |_vm, _argc| {
4981 with_executor(|exec| {
4982 exec.loop_signal = Some(LoopSignal::Continue);
4983 });
4984 Value::Status(0)
4985 });
4986
4987 // `m[k]+=tail` — append onto the existing value (string concat). Mirrors
4988 // zsh's += behavior on assoc-array entries. Missing key creates it with
4989 // just `tail`, matching SET_ASSOC's create-on-demand.
4990 vm.register_builtin(BUILTIN_APPEND_ASSOC, |vm, _argc| {
4991 let tail = vm.pop().to_str();
4992 let key = vm.pop().to_str();
4993 let name = vm.pop().to_str();
4994 with_executor(|exec| {
4995 exec.unset_scalar(&name);
4996 let mut map = exec.assoc(&name).unwrap_or_default();
4997 match map.get_mut(&key) {
4998 Some(existing) => existing.push_str(&tail),
4999 None => {
5000 map.insert(key.clone(), tail.clone());
5001 }
5002 }
5003 exec.set_assoc(name.clone(), map);
5004 // PFA-SMR aspect: assoc subscript-append `m[k]+=tail`.
5005 // Recorder emits a structured assoc event with the
5006 // POST-append value so replay reconstructs end state
5007 // directly (no need to model the +=tail concat).
5008 #[cfg(feature = "recorder")]
5009 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
5010 let ctx = exec.recorder_ctx();
5011 let attrs = exec.recorder_attrs_for(&name);
5012 let new_val = exec
5013 .assoc(&name)
5014 .and_then(|m| m.get(&key).cloned())
5015 .unwrap_or_default();
5016 crate::recorder::emit_assoc_assign(
5017 &name,
5018 vec![(key.clone(), new_val)],
5019 attrs,
5020 true,
5021 ctx,
5022 );
5023 }
5024 });
5025 Value::Status(0)
5026 });
5027
5028 vm.register_builtin(BUILTIN_ARRAY_LENGTH, |vm, _argc| {
5029 let name = vm.pop().to_str();
5030 let len = with_executor(|exec| exec.array(&name).map(|a| a.len()).unwrap_or(0));
5031 Value::str(len.to_string())
5032 });
5033
5034 // `${arr[*]}` — join array elements with the first IFS char into
5035 // a single string. Matches zsh: in DQ context this preserves the
5036 // join; in array context too the result is one Value::Str.
5037 // Set or clear a shell option directly. Used by `noglob CMD ...`
5038 // precommand wrapping — the compiler emits SET_RAW_OPT to flip the
5039 // option ON before compiling the inner words and OFF after, so glob
5040 // expansion of the inner args sees the temporary state.
5041 vm.register_builtin(BUILTIN_SET_RAW_OPT, |vm, _argc| {
5042 let on = vm.pop().to_int() != 0;
5043 let opt = vm.pop().to_str();
5044 with_executor(|exec| {
5045 if on {
5046 crate::ported::options::opt_state_set(&opt, true);
5047 } else {
5048 crate::ported::options::opt_state_unset(&opt);
5049 }
5050 });
5051 Value::Status(0)
5052 });
5053
5054 vm.register_builtin(BUILTIN_ARRAY_JOIN_STAR, |vm, _argc| {
5055 let name = vm.pop().to_str();
5056 let result = with_executor(|exec| {
5057 let sep = exec
5058 .scalar("IFS")
5059 .and_then(|s| s.chars().next())
5060 .map(|c| c.to_string())
5061 .unwrap_or_else(|| " ".to_string());
5062 if name == "@" || name == "*" || name == "argv" {
5063 return exec.pparams().join(&sep);
5064 }
5065 if let Some(arr) = exec.array(&name) {
5066 arr.join(&sep)
5067 } else {
5068 exec.get_variable(&name)
5069 }
5070 });
5071 fusevm::Value::str(result)
5072 });
5073
5074 vm.register_builtin(BUILTIN_ARRAY_ALL, |vm, _argc| {
5075 let name = vm.pop().to_str();
5076 with_executor(|exec| {
5077 // Special positional names — splice the positional list.
5078 if name == "@" || name == "*" || name == "argv" {
5079 return Value::Array(exec.pparams().iter().map(Value::str).collect());
5080 }
5081 match exec.array(&name) {
5082 Some(v) => Value::Array(v.iter().map(Value::str).collect()),
5083 None => {
5084 // Fall back to scalar lookup. zsh (unlike bash)
5085 // does NOT IFS-split a scalar variable in a for
5086 // list — `for w in $scalar` iterates ONCE with the
5087 // scalar value. Word-splitting requires either
5088 // sh_word_split option or explicit `${(s.,.)scalar}`.
5089 let val = exec.get_variable(&name);
5090 if val.is_empty()
5091 && !exec.has_scalar(&name)
5092 && std::env::var(&name).is_err()
5093 {
5094 Value::Array(vec![])
5095 } else if crate::ported::options::opt_state_get("shwordsplit").unwrap_or(false) {
5096 // bash-compat: under setopt sh_word_split, do
5097 // split scalars on IFS chars.
5098 let ifs = exec
5099 .scalar("IFS")
5100 .unwrap_or_else(|| " \t\n".to_string());
5101 let parts: Vec<Value> = val
5102 .split(|c: char| ifs.contains(c))
5103 .filter(|s| !s.is_empty())
5104 .map(Value::str)
5105 .collect();
5106 Value::Array(parts)
5107 } else {
5108 Value::Array(vec![Value::str(val)])
5109 }
5110 }
5111 }
5112 })
5113 });
5114
5115 // BUILTIN_ARRAY_FLATTEN(N): pops N values, flattens one level of Array
5116 // nesting, pushes the resulting Array AND its length as a separate Int.
5117 // The two-value return shape lets the caller (for-loop compile path)
5118 // SetSlot the length before SetSlot'ing the array, without re-deriving
5119 // the length from the array via a second builtin call.
5120 // `coproc [name] { body }` — bidirectional pipe to backgrounded body.
5121 // Stack discipline (top first): [name (str, "" for default), sub_idx (int)].
5122 // On success: parent's `executor.arrays[name]` becomes [write_fd, read_fd]
5123 // and Status(0) is returned. The caller writes to the child's stdin via
5124 // write_fd, reads its stdout via read_fd, and closes both when done.
5125 //
5126 // Bash's coproc convention is `${NAME[0]}` = read_fd, `${NAME[1]}` =
5127 // write_fd. We follow that: arrays[name] = [read_fd_str, write_fd_str].
5128 vm.register_builtin(BUILTIN_RUN_COPROC, |vm, _argc| {
5129 let sub_idx = vm.pop().to_int() as usize;
5130 let raw_name = vm.pop().to_str();
5131 let name = if raw_name.is_empty() {
5132 "COPROC".to_string()
5133 } else {
5134 raw_name
5135 };
5136 let chunk = match vm.chunk.sub_chunks.get(sub_idx).cloned() {
5137 Some(c) => c,
5138 None => return Value::Status(1),
5139 };
5140
5141 // (parent_read ← child_stdout)
5142 let mut p2c = [0i32; 2]; // parent writes, child reads
5143 let mut c2p = [0i32; 2]; // child writes, parent reads
5144 if unsafe { libc::pipe(p2c.as_mut_ptr()) } < 0 {
5145 return Value::Status(1);
5146 }
5147 if unsafe { libc::pipe(c2p.as_mut_ptr()) } < 0 {
5148 unsafe {
5149 libc::close(p2c[0]);
5150 libc::close(p2c[1]);
5151 }
5152 return Value::Status(1);
5153 }
5154
5155 match unsafe { libc::fork() } {
5156 -1 => {
5157 unsafe {
5158 libc::close(p2c[0]);
5159 libc::close(p2c[1]);
5160 libc::close(c2p[0]);
5161 libc::close(c2p[1]);
5162 }
5163 Value::Status(1)
5164 }
5165 0 => {
5166 // Child: stdin from p2c[0], stdout to c2p[1]. Close all
5167 // unused fds. setsid so SIGINT to fg doesn't hit us.
5168 unsafe {
5169 libc::dup2(p2c[0], libc::STDIN_FILENO);
5170 libc::dup2(c2p[1], libc::STDOUT_FILENO);
5171 libc::close(p2c[0]);
5172 libc::close(p2c[1]);
5173 libc::close(c2p[0]);
5174 libc::close(c2p[1]);
5175 libc::setsid();
5176 }
5177 crate::fusevm_disasm::maybe_print_stdout("coproc:child", &chunk);
5178 let mut co_vm = fusevm::VM::new(chunk);
5179 register_builtins(&mut co_vm);
5180 let _ = co_vm.run();
5181 let _ = std::io::stdout().flush();
5182 let _ = std::io::stderr().flush();
5183 std::process::exit(co_vm.last_status);
5184 }
5185 _pid => {
5186 // Parent: close child ends, store [read_fd, write_fd] in NAME.
5187 unsafe {
5188 libc::close(p2c[0]);
5189 libc::close(c2p[1]);
5190 }
5191 let read_fd = c2p[0];
5192 let write_fd = p2c[1];
5193 with_executor(|exec| {
5194 exec.unset_scalar(&name);
5195 exec.set_array(name, vec![read_fd.to_string(), write_fd.to_string()]);
5196 });
5197 Value::Status(0)
5198 }
5199 }
5200 });
5201
5202 vm.register_builtin(BUILTIN_ARRAY_FLATTEN, |vm, argc| {
5203 let n = argc as usize;
5204 let start = vm.stack.len().saturating_sub(n);
5205 let raw: Vec<fusevm::Value> = vm.stack.drain(start..).collect();
5206 let mut flat: Vec<fusevm::Value> = Vec::with_capacity(raw.len());
5207 for v in raw {
5208 match v {
5209 fusevm::Value::Array(items) => flat.extend(items),
5210 other => flat.push(other),
5211 }
5212 }
5213 let len = flat.len() as i64;
5214 // Push the array first; the Int(len) becomes the builtin's return
5215 // value (which CallBuiltin already pushes). Caller consumes in
5216 // reverse: SetSlot(len_slot) pops Int, SetSlot(arr_slot) pops Array.
5217 vm.push(fusevm::Value::Array(flat));
5218 fusevm::Value::Int(len)
5219 });
5220
5221 // Shell variable get/set — routes through executor.variables so nested
5222 // VMs (function calls) and tree-walker callers see the same storage.
5223 vm.register_builtin(BUILTIN_GET_VAR, |vm, argc| {
5224 let args = pop_args(vm, argc);
5225 let name = args.into_iter().next().unwrap_or_default();
5226 let live_status = vm.last_status;
5227 // `$@` and `$*` need splice semantics — return Value::Array of
5228 // positional params so for-loop's BUILTIN_ARRAY_FLATTEN spreads them
5229 // and pop_args splits them into argv slots. zsh's `"$@"` bslashquote-each-
5230 // word semantics matches: each pos-param becomes its own arg.
5231 // Same for arrays accessed by name (e.g. `$arr` in some contexts).
5232 let sync_status = |exec: &mut ShellExecutor| {
5233 exec.set_last_status(live_status);
5234 };
5235 if name == "@" || name == "*" {
5236 return with_executor(|exec| {
5237 sync_status(exec);
5238 fusevm::Value::Array(
5239 exec.pparams()
5240 .iter()
5241 .map(fusevm::Value::str)
5242 .collect(),
5243 )
5244 });
5245 }
5246 // RC_EXPAND_PARAM: when the option is set and `name` refers to
5247 // an array, return Value::Array so the enclosing word's
5248 // BUILTIN_CONCAT_DISTRIBUTE distributes element-wise. Without
5249 // the option, arrays still join to a space-separated scalar
5250 // (zsh's default unquoted-array-as-scalar semantics).
5251 let rc_expand =
5252 with_executor(|exec| crate::ported::options::opt_state_get("rcexpandparam").unwrap_or(false));
5253 if rc_expand {
5254 let arr_val = with_executor(|exec| {
5255 sync_status(exec);
5256 exec.array(&name)
5257 });
5258 if let Some(arr) = arr_val {
5259 return fusevm::Value::Array(arr.into_iter().map(fusevm::Value::str).collect());
5260 }
5261 }
5262 // Magic-assoc fallback FIRST — `${aliases}` / `${functions}`
5263 // / `${commands}` / etc. should return the value list per
5264 // zsh's bare-assoc semantics. Without this, those names fell
5265 // through to `get_variable` which is empty (they live in
5266 // separate executor tables, not `assoc_arrays`). Return as
5267 // a Value::Array so `arr=(${aliases})` distributes into
5268 // multiple elements, matching zsh's array-context word
5269 // splitting for assoc-bare references.
5270 let magic_vals = with_executor(|exec| {
5271 sync_status(exec);
5272 crate::exec::scan_magic_assoc_keys(&name).map(|keys| {
5273 keys.iter()
5274 .map(|k| exec.get_special_array_value(&name, k).unwrap_or_default())
5275 .collect::<Vec<_>>()
5276 })
5277 });
5278 if let Some(vals) = magic_vals {
5279 // Distinguish "name IS a magic-assoc with no entries"
5280 // (return Array(empty)) from "name is unknown — fall
5281 // through to get_variable".
5282 return fusevm::Value::Array(vals.into_iter().map(fusevm::Value::str).collect());
5283 }
5284 // Indexed-array path: return Value::Array so pop_args splats
5285 // each element into its own argv slot. Direct port of zsh's
5286 // unquoted `$arr` semantics — each element becomes a separate
5287 // word in command-arg position.
5288 //
5289 // DQ context exception: inside `"...$arr..."`, zsh joins with
5290 // the first char of $IFS (default space) so the DQ word stays
5291 // a single argv slot. Detect via in_dq_context (bumped by
5292 // BUILTIN_EXPAND_TEXT mode 1) and return the joined scalar.
5293 // Direct port of Src/subst.c:1759-1813 nojoin/sepjoin: in DQ
5294 // (qt=1) without explicit `(@)`, sepjoin runs and the result
5295 // is one word.
5296 let arr_assoc_data = with_executor(|exec| {
5297 sync_status(exec);
5298 let in_dq = exec.in_dq_context > 0;
5299 // KSH_ARRAYS: bare `$arr` returns ONLY arr[0] (zero-
5300 // based first-element-only semantics). Direct port of
5301 // Src/params.c getstrvalue's KSH_ARRAYS gate which
5302 // returns aval[0] instead of the whole array.
5303 let ksh_arrays = crate::ported::options::opt_state_get("ksharrays").unwrap_or(false);
5304 if let Some(arr) = exec.array(&name) {
5305 if ksh_arrays {
5306 return Some((vec![arr.first().cloned().unwrap_or_default()], in_dq));
5307 }
5308 return Some((arr.clone(), in_dq));
5309 }
5310 if let Some(map) = exec.assoc(&name) {
5311 let mut keys: Vec<&String> = map.keys().collect();
5312 keys.sort();
5313 let values: Vec<String> = keys
5314 .iter()
5315 .filter_map(|k| map.get(*k).cloned())
5316 .collect();
5317 if ksh_arrays {
5318 return Some((vec![values.into_iter().next().unwrap_or_default()], in_dq));
5319 }
5320 return Some((values, in_dq));
5321 }
5322 None
5323 });
5324 if let Some((items, in_dq)) = arr_assoc_data {
5325 if in_dq {
5326 let sep = with_executor(|exec| {
5327 exec.get_variable("IFS")
5328 .chars()
5329 .next()
5330 .map(|c| c.to_string())
5331 .unwrap_or_else(|| " ".to_string())
5332 });
5333 return Value::str(items.join(&sep));
5334 }
5335 return fusevm::Value::Array(items.into_iter().map(fusevm::Value::str).collect());
5336 }
5337 let (val, in_dq) = with_executor(|exec| {
5338 sync_status(exec);
5339 (exec.get_variable(&name), exec.in_dq_context > 0)
5340 });
5341 // Empty unquoted scalar → drop the arg (zsh "remove empty
5342 // unquoted words" rule). Returning empty Value::Array makes
5343 // pop_args contribute zero items. DQ context keeps the empty
5344 // string so "$a" stays a single empty arg. Direct port of
5345 // subst.c's elide-empty pass.
5346 if val.is_empty() && !in_dq {
5347 return fusevm::Value::Array(Vec::new());
5348 }
5349 Value::str(val)
5350 });
5351
5352 // `name+=val` (no parens) — runtime dispatch:
5353 // - if `name` is in `arrays` → push `val` as new element
5354 // - if `name` is in `assoc_arrays` → refuse (zsh errors here)
5355 // - else → scalar concat (existing behavior)
5356 // Stack: [name, value].
5357 vm.register_builtin(BUILTIN_APPEND_SCALAR_OR_PUSH, |vm, argc| {
5358 let args = pop_args(vm, argc);
5359 let mut iter = args.into_iter();
5360 let name = iter.next().unwrap_or_default();
5361 let value = iter.next().unwrap_or_default();
5362 with_executor(|exec| {
5363 if let Some(mut arr) = exec.array(&name) {
5364 arr.push(value.clone());
5365 exec.set_array(name.clone(), arr);
5366 // PFA-SMR aspect: `name+=elem` array push (scalar form
5367 // resolved to existing indexed array). is_append=true.
5368 #[cfg(feature = "recorder")]
5369 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
5370 let ctx = exec.recorder_ctx();
5371 let attrs = exec.recorder_attrs_for(&name);
5372 emit_path_or_assign(&name, std::slice::from_ref(&value), attrs, true, &ctx);
5373 }
5374 return;
5375 }
5376 if exec.assoc(&name).is_some() {
5377 eprintln!("zshrs: {}: cannot use += on assoc without (key val)", name);
5378 return;
5379 }
5380 // typeset -i: `+=` is arithmetic add, not string concat.
5381 // `typeset -i x=42; x+=8` must store 50, not "428". Per
5382 // Src/params.c assignsparam:3270-3293, the PM_TYPE switch
5383 // routes integer/float through matheval. Read PM_INTEGER
5384 // from the canonical Param flags.
5385 let is_integer = exec.is_integer_param(&name);
5386 if is_integer {
5387 let prev = exec.get_variable(&name);
5388 let prev_n: i64 = prev.parse().unwrap_or(0);
5389 let added = crate::ported::math::mathevali(&crate::ported::subst::singsub(&value)).unwrap_or(0);
5390 let new_val = (prev_n + added).to_string();
5391 exec.set_scalar(name.clone(), new_val.clone());
5392 // PFA-SMR aspect: integer-typed append. The append
5393 // operator is arithmetic; replay should restore the
5394 // POST-add value so the bundle reflects end state.
5395 #[cfg(feature = "recorder")]
5396 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
5397 let ctx = exec.recorder_ctx();
5398 let attrs = exec.recorder_attrs_for(&name);
5399 crate::recorder::emit_assign_typed(&name, &new_val, attrs, ctx);
5400 }
5401 return;
5402 }
5403 // Scalar concat.
5404 let prev = exec.get_variable(&name);
5405 let combined = format!("{}{}", prev, value);
5406 exec.set_scalar(name.clone(), combined.clone());
5407 // PFA-SMR aspect: scalar concat (`PATH+=":/foo"` and any
5408 // other `NAME+=tail` shape). For PATH-family scalars the
5409 // path-or-assign helper still emits a path_mod with the
5410 // FULL post-concat value so replay knows the end state.
5411 #[cfg(feature = "recorder")]
5412 if crate::recorder::is_enabled() && exec.local_scope_depth == 0 {
5413 let ctx = exec.recorder_ctx();
5414 let attrs = exec.recorder_attrs_for(&name);
5415 let lower = name.to_ascii_lowercase();
5416 if matches!(
5417 lower.as_str(),
5418 "path" | "fpath" | "manpath" | "module_path" | "cdpath"
5419 ) {
5420 emit_path_or_assign(
5421 &name,
5422 std::slice::from_ref(&combined),
5423 attrs,
5424 true,
5425 &ctx,
5426 );
5427 } else {
5428 crate::recorder::emit_assign_typed(&name, &combined, attrs, ctx);
5429 }
5430 }
5431 });
5432 Value::Status(0)
5433 });
5434
5435 vm.register_builtin(BUILTIN_SET_VAR, |vm, argc| {
5436 let args = pop_args(vm, argc);
5437 let mut iter = args.into_iter();
5438 let name = iter.next().unwrap_or_default();
5439 let value = iter.next().unwrap_or_default();
5440 let blocked = with_executor(|exec| {
5441 // zsh has a fixed set of intrinsic read-only specials that
5442 // can never be assigned to from script. This is a hard
5443 // wired list (params.c `ROVAR` flag) — not user-settable.
5444 // NOTE: `_` is NOT readonly — zsh allows assignments to
5445 // and `unset` of it (it's just the last-arg auto-update).
5446 // ZSH_ARGZERO is also writable in zsh per Src/params.c
5447 // (uses PM_SCALAR without PM_READONLY); zinit's startup
5448 // line `ZSH_ARGZERO=$0` relies on this.
5449 let is_intrinsic_ro = matches!(
5450 name.as_str(),
5451 "PPID" | "LINENO" | "argv0" | "ARGC"
5452 );
5453 let is_ro = is_intrinsic_ro || exec.is_readonly_param(&name);
5454 if is_ro {
5455 eprintln!("zshrs:1: read-only variable: {}", name);
5456 // Mirror zsh -c: read-only assignment failure aborts
5457 // the shell with status 1, not just the command.
5458 std::process::exit(1);
5459 }
5460 // If the variable was previously declared `integer` (or
5461 // `typeset -i`), arith-evaluate the value before storing.
5462 // zsh: `integer i; i=5*3` stores 15. Mirrors C's PM_TYPE
5463 // dispatch at Src/params.c assignsparam:3270.
5464 let is_integer = exec.is_integer_param(&name);
5465 // `typeset -i N` base-formatting reads `Param.base` directly
5466 // (Src/zsh.h:1860 — int print base). Per C convfloat /
5467 // convbase in params.c, base==0 means default decimal.
5468 let int_base: Option<u32> = if is_integer {
5469 let b = crate::ported::params::paramtab().read().ok()
5470 .and_then(|t| t.get(&name).map(|pm| pm.base))
5471 .unwrap_or(0);
5472 if b > 0 { Some(b as u32) } else { None }
5473 } else {
5474 None
5475 };
5476 let stored = if is_integer && !value.is_empty() {
5477 let evaluated = crate::ported::math::mathevali(&crate::ported::subst::singsub(&value)).unwrap_or(0).to_string();
5478 if let Some(base) = int_base {
5479 evaluated
5480 .parse::<i64>()
5481 .map(|n| format_int_in_base(n, base))
5482 .unwrap_or(evaluated)
5483 } else {
5484 evaluated
5485 }
5486 } else {
5487 value.clone()
5488 };
5489 // c:Src/params.c — `typeset -l` (PM_LOWER) / `-u`
5490 // (PM_UPPER) case-fold the assigned value before storage.
5491 // Direct port of the PM_LOWER/PM_UPPER setstrvalue arms.
5492 let stored = if exec.is_uppercase_param(&name) {
5493 stored.to_uppercase()
5494 } else if exec.is_lowercase_param(&name) {
5495 stored.to_lowercase()
5496 } else {
5497 stored
5498 };
5499 // Mirror scalar→array if name is the scalar side of a
5500 // typeset -T tie. Direct port of Src/params.c PM_TIED:
5501 // assigning to PATH must update both `path` (the array
5502 // mirror) and the process env (so child execs see the
5503 // new value, and so find_in_path / external lookups
5504 // resolve correctly). Without the env::set_var step
5505 // here, `PATH=/nope; ls` continued to find ls via the
5506 // shell's startup-time env PATH.
5507 if let Some((arr_name, sep)) = exec.tied_scalar_to_array.get(&name).cloned() {
5508 let parts: Vec<String> = if stored.is_empty() {
5509 Vec::new()
5510 } else {
5511 stored.split(&sep).map(String::from).collect()
5512 };
5513 exec.set_array(arr_name, parts);
5514 std::env::set_var(&name, &stored);
5515 // Clear the command hash on PATH change so subsequent
5516 // command lookups walk the new PATH instead of
5517 // returning stale absolute paths from before the
5518 // assignment. zsh's bin_set rehashes lazily; this is
5519 // the simplest equivalent.
5520 if name == "PATH" {
5521 if let Ok(mut t) = crate::ported::hashtable::cmdnamtab_lock().write() {
5522 t.clear();
5523 }
5524 }
5525 let _ = exec; // silence unused-binding in the no-PATH branch
5526 }
5527 // zsh enforces a minimum of 1 on `HISTSIZE` — `HISTSIZE=0`
5528 // and `HISTSIZE=-5` both clamp to `1`. Mirror at storage
5529 // time so subsequent reads return the clamped value.
5530 let stored = if name == "HISTSIZE" {
5531 stored
5532 .parse::<i64>()
5533 .map(|n| n.max(1).to_string())
5534 .unwrap_or_else(|_| stored.clone())
5535 } else {
5536 stored
5537 };
5538 // If we're inside an inline-assignment frame (`X=foo cmd`
5539 // is currently exec'ing the prefix), record the previous
5540 // value so END_INLINE_ENV can restore it after the command
5541 // returns. Then export the new value to the env so the
5542 // child sees it. zsh's `X=foo cmd` semantics: shell
5543 // variable AND env entry both vanish after cmd returns.
5544 let in_inline_env = !exec.inline_env_stack.is_empty();
5545 if in_inline_env {
5546 let prev_var = crate::ported::params::getsparam(&name);
5547 let prev_env = std::env::var(&name).ok();
5548 exec.inline_env_stack
5549 .last_mut()
5550 .unwrap()
5551 .push((name.clone(), prev_var, prev_env));
5552 std::env::set_var(&name, &stored);
5553 }
5554 exec.set_scalar(name.clone(), stored.clone());
5555 // Mirror the write into paramtab (the C-port canonical
5556 // store at `Src/params.c:3350 setsparam`). Without this,
5557 // `src/ported/subst.rs::vars_get` and
5558 // `src/ported/params.rs::getsparam` see paramtab-only and
5559 // miss script-level `x=hello` assignments — heredoc body
5560 // substitution, `${x}` inside `singsub`, and any other
5561 // C-port reader that doesn't go through fusevm's typed-
5562 // variable path returns empty. paramtab IS the C-source
5563 // canonical scalar store; this mirror keeps it coherent
5564 // with the parallel `exec.variables` HashMap.
5565 crate::ported::params::setsparam(&name, &stored); // c:params.c:3350
5566 // `set -o allexport`: every assignment auto-exports the var.
5567 // zsh: `setopt allexport; a=42; env | grep ^a=` prints `a=42`.
5568 // Without this, env didn't see user-set scalars.
5569 let allexport = crate::ported::options::opt_state_get("allexport").unwrap_or(false);
5570 let already_exported = (exec.param_flags(&name) as u32 & crate::ported::zsh_h::PM_EXPORTED) != 0;
5571 if allexport || already_exported {
5572 std::env::set_var(&name, &stored);
5573 }
5574 // PFA-SMR aspect: every top-level scalar assignment
5575 // (`VAR=value`) compiles to BUILTIN_SET_VAR, so this is the
5576 // chokepoint. Skip the recorder when inside a function scope
5577 // (those are runtime locals, not config state) and skip the
5578 // intrinsic specials zsh maintains itself.
5579 #[cfg(feature = "recorder")]
5580 if crate::recorder::is_enabled()
5581 && exec.local_scope_depth == 0
5582 && !matches!(
5583 name.as_str(),
5584 "PPID" | "LINENO" | "ZSH_ARGZERO" | "argv0" | "ARGC" | "?" | "_" | "RANDOM"
5585 )
5586 {
5587 let ctx = exec.recorder_ctx();
5588 let attrs = exec.recorder_attrs_for(&name);
5589 crate::recorder::emit_assign_typed(&name, &stored, attrs, ctx);
5590 }
5591 false
5592 });
5593 if blocked {
5594 return Value::Status(1);
5595 }
5596 // Propagate cmd-subst's exit status to $?. zsh: `a=$(false);
5597 // echo $?` → 1. run_command_substitution sets last_status
5598 // before returning; we pick it up here so the assignment's
5599 // status reflects the cmd-subst result.
5600 //
5601 // CRITICAL: read `vm.last_status` (live), NOT
5602 // `exec.last_status` (stale — only synced at statement
5603 // boundaries; see the BUILTIN_RETURN handler ~line 1003).
5604 // compile_assign emits LoadInt(0) + SetStatus BEFORE the
5605 // RHS is evaluated specifically to clear the live status,
5606 // so a plain assignment (no cmd-subst) reads back 0 and a
5607 // `$(...)` value reads back the subst's exit. Reading the
5608 // stale exec field here would always propagate the previous
5609 // command's status, breaking `false; a=plain; echo $?` → 1
5610 // (should be 0).
5611 let captured = vm.last_status;
5612 Value::Status(captured)
5613 });
5614
5615 // BUILTIN_REGISTER_FUNCTION (id 282) was a legacy JSON-AST body
5616 // bridge. ZshCompiler emits BUILTIN_REGISTER_COMPILED_FN (id 305)
5617 // instead, which carries a base64 bincode of an already-compiled
5618 // Chunk. The constant + handler are removed; the ID stays reserved.
5619
5620 // Pre-compiled function registration — used by compile_zsh.rs's
5621 // FuncDef path. Stack: [name, base64-bincode-of-Chunk]. We decode
5622 // the base64, deserialize the Chunk, and store directly in
5623 // executor.functions_compiled. Bypasses the ShellCommand JSON layer.
5624 // `[[ -v name ]]` — true iff `name` is a set variable (incl. set-empty,
5625 // arrays, assoc arrays, and exported env vars). Pops one string, pushes
5626 // Bool. Matches bash's -v semantics; zsh's `(t)` flag overlaps.
5627 vm.register_builtin(BUILTIN_VAR_EXISTS, |vm, _argc| {
5628 let name = vm.pop().to_str();
5629 // `[[ -v a[N] ]]` checks element existence, not just the array.
5630 // Split on `[`, look up the array, and verify the resolved
5631 // index falls within the populated range. `[[ -v h[key] ]]`
5632 // checks an associative array key.
5633 if let Some(open) = name.find('[') {
5634 if name.ends_with(']') {
5635 let arr_name = &name[..open];
5636 let key = &name[open + 1..name.len() - 1];
5637 let exists = with_executor(|exec| {
5638 if let Some(arr) = exec.array(arr_name) {
5639 // 1-based index, supports negatives.
5640 let parsed = key.parse::<i64>().ok();
5641 if let Some(i) = parsed {
5642 let len = arr.len() as i64;
5643 let resolved = if i < 0 { len + i + 1 } else { i };
5644 return resolved >= 1 && resolved <= len;
5645 }
5646 return false;
5647 }
5648 if let Some(h) = exec.assoc(arr_name) {
5649 return h.contains_key(key);
5650 }
5651 false
5652 });
5653 return fusevm::Value::Bool(exists);
5654 }
5655 }
5656 let exists = with_executor(|exec| {
5657 // Positional parameter test: `[[ -v N ]]` for an integer N
5658 // checks whether `$N` is set — i.e. there are at least N
5659 // positional params. The digit name otherwise won't exist
5660 // in `variables` unless explicitly assigned.
5661 if !name.is_empty() && name.chars().all(|c| c.is_ascii_digit()) {
5662 if let Ok(n) = name.parse::<usize>() {
5663 if n == 0 {
5664 return exec.has_scalar("0");
5665 }
5666 return n <= exec.pparams().len();
5667 }
5668 }
5669 exec.has_scalar(&name)
5670 || exec.array(&name).is_some()
5671 || exec.assoc(&name).is_some()
5672 || std::env::var(&name).is_ok()
5673 });
5674 fusevm::Value::Bool(exists)
5675 });
5676
5677 // `time { compound; ... }` — runs the sub-chunk and prints elapsed
5678 // wall-clock time. zsh's full `time` also tracks user/system CPU via
5679 // getrusage on the *child*; we approximate via wall-time only since
5680 // the sub-chunk runs in-process (no fork). Output format matches
5681 // `time simple-cmd` (already implemented elsewhere via exectime).
5682 vm.register_builtin(BUILTIN_TIME_SUBLIST, |vm, _argc| {
5683 let sub_idx = vm.pop().to_int() as usize;
5684 let chunk_opt = vm.chunk.sub_chunks.get(sub_idx).cloned();
5685 let Some(chunk) = chunk_opt else {
5686 return Value::Status(0);
5687 };
5688 let start = Instant::now();
5689 crate::fusevm_disasm::maybe_print_stdout("time_sublist", &chunk);
5690 let mut sub_vm = fusevm::VM::new(chunk);
5691 register_builtins(&mut sub_vm);
5692 let _ = sub_vm.run();
5693 let status = sub_vm.last_status;
5694 let elapsed = start.elapsed();
5695 eprintln!(
5696 "{:.2}s user {:.2}s system {:.0}% cpu {:.3} total",
5697 elapsed.as_secs_f64() * 0.7,
5698 elapsed.as_secs_f64() * 0.1,
5699 ((elapsed.as_secs_f64() * 0.8) / elapsed.as_secs_f64() * 100.0).min(100.0),
5700 elapsed.as_secs_f64()
5701 );
5702 Value::Status(status)
5703 });
5704
5705 // `{name}>file` / `{name}<file` / `{name}>>file` — named-fd allocator.
5706 // Stack: [path, varid, op_byte]. Opens path with the appropriate mode
5707 // and stores the resulting fd number in $varid as a string. We use
5708 // a high starting fd (10+) by allocating then dup'ing — matches zsh's
5709 // "fresh fd >= 10" promise so subsequent commands don't collide on
5710 // stdin/out/err.
5711 vm.register_builtin(BUILTIN_OPEN_NAMED_FD, |vm, _argc| {
5712 let op_byte = vm.pop().to_int() as u8;
5713 let varid = vm.pop().to_str();
5714 let path = vm.pop().to_str();
5715 let path_c = match std::ffi::CString::new(path.clone()) {
5716 Ok(c) => c,
5717 Err(_) => return Value::Status(1),
5718 };
5719 let flags = match op_byte {
5720 b if b == fusevm::op::redirect_op::READ => libc::O_RDONLY,
5721 b if b == fusevm::op::redirect_op::WRITE || b == fusevm::op::redirect_op::CLOBBER => {
5722 libc::O_WRONLY | libc::O_CREAT | libc::O_TRUNC
5723 }
5724 b if b == fusevm::op::redirect_op::APPEND => {
5725 libc::O_WRONLY | libc::O_CREAT | libc::O_APPEND
5726 }
5727 b if b == fusevm::op::redirect_op::READ_WRITE => libc::O_RDWR | libc::O_CREAT,
5728 _ => return Value::Status(1),
5729 };
5730 let fd = unsafe { libc::open(path_c.as_ptr(), flags, 0o644) };
5731 if fd < 0 {
5732 return Value::Status(1);
5733 }
5734 // Re-dup to fd >= 10 so positional fds (0/1/2/etc.) stay free.
5735 let new_fd = unsafe { libc::fcntl(fd, libc::F_DUPFD_CLOEXEC, 10) };
5736 let final_fd = if new_fd >= 10 {
5737 unsafe { libc::close(fd) };
5738 new_fd
5739 } else {
5740 fd
5741 };
5742 with_executor(|exec| {
5743 exec.set_scalar(varid, final_fd.to_string());
5744 });
5745 Value::Status(0)
5746 });
5747
5748 // BUILTIN_SET_TRY_BLOCK_ERROR — capture the try-block's exit status
5749 // into $TRY_BLOCK_ERROR so the always-arm can read it.
5750 vm.register_builtin(BUILTIN_SET_TRY_BLOCK_ERROR, |vm, _argc| {
5751 let vm_status = vm.last_status;
5752 with_executor(|exec| {
5753 exec.set_scalar("TRY_BLOCK_ERROR".to_string(), vm_status.to_string());
5754 });
5755 fusevm::Value::Status(0)
5756 });
5757
5758 // BUILTIN_BEGIN_INLINE_ENV / END_INLINE_ENV — wrap an
5759 // inline-assignment-prefixed command (`X=foo Y=bar cmd`):
5760 // BEGIN pushes a save frame; SET_VAR fires for each assign and
5761 // ALSO env::set_var's the value (visible to cmd's child); the
5762 // command runs; END pops the frame and restores both shell-var
5763 // and process-env state. Direct port of zsh's addvars() →
5764 // execute_simple → restore-after-exec contract.
5765 vm.register_builtin(BUILTIN_BEGIN_INLINE_ENV, |_vm, _argc| {
5766 with_executor(|exec| {
5767 exec.inline_env_stack.push(Vec::new());
5768 });
5769 fusevm::Value::Status(0)
5770 });
5771 vm.register_builtin(BUILTIN_END_INLINE_ENV, |_vm, _argc| {
5772 with_executor(|exec| {
5773 if let Some(frame) = exec.inline_env_stack.pop() {
5774 for (name, prev_var, prev_env) in frame.into_iter().rev() {
5775 match prev_var {
5776 Some(v) => {
5777 exec.set_scalar(name.clone(), v);
5778 }
5779 None => {
5780 exec.unset_scalar(&name);
5781 }
5782 }
5783 match prev_env {
5784 Some(v) => std::env::set_var(&name, &v),
5785 None => std::env::remove_var(&name),
5786 }
5787 }
5788 }
5789 });
5790 fusevm::Value::Status(0)
5791 });
5792
5793 // BUILTIN_RESTORE_TRY_BLOCK_STATUS — emitted at the end of an
5794 // `always` arm. Per zshmisc, the exit status of the entire
5795 // `{ try } always { finally }` construct is the try-list's
5796 // status, regardless of what happens in the always-list (the
5797 // exception is `return`/`exit` inside always, which short-
5798 // circuits and the cleanup is the only thing that runs). So
5799 // restore TRY_BLOCK_ERROR unconditionally — the always-list's
5800 // exit status is discarded for the construct.
5801 vm.register_builtin(BUILTIN_RESTORE_TRY_BLOCK_STATUS, |_vm, _argc| {
5802 let try_status = with_executor(|exec| {
5803 exec.scalar("TRY_BLOCK_ERROR")
5804 .and_then(|s| s.parse::<i32>().ok())
5805 .unwrap_or(0)
5806 });
5807 fusevm::Value::Status(try_status)
5808 });
5809
5810 vm.register_builtin(BUILTIN_UNKNOWN_COND, |vm, _argc| {
5811 // Unused — the diagnostic is emitted at compile time
5812 // (BUILTIN dispatch wasn't reliably firing for this path).
5813 // Kept registered as a no-op placeholder.
5814 let _ = vm.pop();
5815 fusevm::Value::Bool(false)
5816 });
5817
5818 vm.register_builtin(BUILTIN_IS_TTY, |vm, _argc| {
5819 let fd_str = vm.pop().to_str();
5820 let fd: i32 = fd_str.trim().parse().unwrap_or(-1);
5821 let is_tty = if fd < 0 {
5822 false
5823 } else {
5824 unsafe { libc::isatty(fd) != 0 }
5825 };
5826 fusevm::Value::Bool(is_tty)
5827 });
5828
5829 // Set $LINENO before executing the next statement. Direct
5830 // port of zsh's `lineno` global tracking from Src/input.c
5831 // (`if ((inbufflags & INP_LINENO) || !strin) && c == '\n')
5832 // lineno++;`). The compiler emits one of these before each
5833 // top-level pipe in `compile_sublist`, carrying the line
5834 // number captured by the parser at `ZshPipe.lineno`. Pops
5835 // [n], updates `$LINENO` in the variable table.
5836 vm.register_builtin(BUILTIN_SET_LINENO, |vm, _argc| {
5837 let n = vm.pop().to_int();
5838 with_executor(|exec| {
5839 exec.set_scalar("LINENO".to_string(), n.to_string());
5840 });
5841 // Mirror to the file-static `lineno` (utils.c:121) that
5842 // zerrmsg reads at utils.c:301 for the `:N: msg` prefix.
5843 crate::ported::utils::set_lineno(n as i32);
5844 fusevm::Value::Status(0)
5845 });
5846
5847 // Direct port of Src/prompt.c:1623 cmdpush. Token is a `CS_*`
5848 // value (zsh.h:2775-2806) emitted by compile_zsh around each
5849 // compound command (if/while/[[…]]/((…))/$(…)) and consumed by
5850 // `%_` in PS4 / prompt expansion.
5851 vm.register_builtin(BUILTIN_CMD_PUSH, |vm, _argc| {
5852 let token = vm.pop().to_int() as u8;
5853 // Route through canonical cmdpush (Src/prompt.c:1623). The
5854 // prompt expander reads from the file-static `CMDSTACK` at
5855 // `prompt.rs:2006`, not `exec.cmd_stack` — without this,
5856 // `%_` in PS4 saw an empty stack during xtrace.
5857 if (token as i32) < crate::ported::zsh_h::CS_COUNT {
5858 crate::ported::prompt::cmdpush(token);
5859 }
5860 // Canonical `cmdpush()` above already mirrors into the
5861 // `prompt::CMDSTACK` thread_local (Src/prompt.c:1620). The
5862 // legacy `exec.cmd_stack` mirror is gone.
5863 let _ = token;
5864 fusevm::Value::Status(0)
5865 });
5866
5867 // Direct port of Src/prompt.c:1631 cmdpop.
5868 vm.register_builtin(BUILTIN_CMD_POP, |_vm, _argc| {
5869 crate::ported::prompt::cmdpop();
5870 fusevm::Value::Status(0)
5871 });
5872
5873 vm.register_builtin(BUILTIN_OPTION_SET, |vm, _argc| {
5874 let name = vm.pop().to_str();
5875 // Direct port of `optison(char *name, char *s)` at Src/cond.c:502 — `[[ -o NAME ]]`
5876 // reads through the same `opts[]` array that `setopt NAME`
5877 // writes via `dosetopt`. Earlier code read a duplicate Executor
5878 // HashMap which never saw `bin_setopt`'s writes (those land in
5879 // `OPTS_LIVE` via `opt_state_set`). Routing through the canonical
5880 // C port restores the single-store invariant: one `opts[]`,
5881 // shared between setopt/unsetopt and `[[ -o ]]`.
5882 let r = crate::ported::cond::optison("test", &name); // c:cond.c:502
5883 match r {
5884 0 => fusevm::Value::Bool(true), // c:cond.c:520 set
5885 1 => fusevm::Value::Bool(false), // c:cond.c:518/520 unset
5886 _ => {
5887 // c:cond.c:514 — unknown option: zwarnnam emitted by
5888 // optison itself when POSIXBUILTINS is unset; mirror to
5889 // stderr here for parity with the earlier diagnostic.
5890 eprintln!("zshrs:1: no such option: {}", name);
5891 fusevm::Value::Bool(false)
5892 }
5893 }
5894 });
5895
5896 vm.register_builtin(BUILTIN_PARAM_FILTER, |vm, _argc| {
5897 let pattern_raw = vm.pop().to_str();
5898 let name = vm.pop().to_str();
5899 // Expand `$VAR` / `${VAR}` / `$(cmd)` / `$((expr))` references in
5900 // the pattern before matching. Direct port of Src/subst.c:3192
5901 // case '#' arm which calls singsub on the operand. zinit's
5902 // `${(@)region_highlight:#$_LAST_HIGHLIGHT}` and similar idioms
5903 // rely on the pattern being expanded first.
5904 let pattern = if pattern_raw.contains('$') || pattern_raw.contains('`') {
5905 crate::ported::subst::singsub(&pattern_raw)
5906 } else {
5907 pattern_raw
5908 };
5909 let arr_val = with_executor(|exec| exec.array(&name));
5910 // Inline of the deleted extendedglob_match helper (Src/glob.c
5911 // pattern_match path): leading `^` inverts when extendedglob is
5912 // set; otherwise falls through to glob_match_static. Plain
5913 // literal-equal path retained for the no-meta-char case
5914 // (cheaper than running a regex compile on every element).
5915 let matches_glob = |s: &str, pat: &str| -> bool {
5916 let starts_neg = pat.starts_with('^');
5917 if pat.contains('*') || pat.contains('?') || pat.contains('[') || starts_neg {
5918 let extendedglob = with_executor(|exec| {
5919 crate::ported::options::opt_state_get("extendedglob").unwrap_or(false)
5920 });
5921 if extendedglob {
5922 if let Some(neg) = pat.strip_prefix('^') {
5923 return !crate::exec::glob_match_static(s, neg);
5924 }
5925 }
5926 crate::exec::glob_match_static(s, pat)
5927 } else {
5928 s == pat
5929 }
5930 };
5931 // (M) flag inverts the filter: keep matching elements, drop
5932 // non-matching (vs default which drops matches). Direct port
5933 // of subst.c's SUB_MATCH bit which getmatch consults to
5934 // pick the "matched" disposition over the "rest" default.
5935 let invert = {
5936 let sf = crate::ported::subst::sub_flags_get(); // c:2171
5937 let inv = (sf & 0x0008) != 0; // c:2171 SUB_MATCH
5938 crate::ported::subst::sub_flags_set(0); // c:2169 (consume)
5939 inv
5940 };
5941 if let Some(arr) = arr_val {
5942 let kept: Vec<fusevm::Value> = arr
5943 .into_iter()
5944 .filter(|elem| { // c:2171
5945 let m = matches_glob(elem, &pattern); // c:2171
5946 if invert { m } else { !m } // c:2171
5947 })
5948 .map(fusevm::Value::str)
5949 .collect();
5950 return fusevm::Value::Array(kept);
5951 }
5952 let val = with_executor(|exec| exec.get_variable(&name));
5953 let m = matches_glob(&val, &pattern);
5954 if invert { // c:2171
5955 if m { fusevm::Value::str(val) } else { fusevm::Value::str(String::new()) } // c:2171
5956 } else if m {
5957 fusevm::Value::str(String::new())
5958 } else {
5959 fusevm::Value::str(val)
5960 }
5961 });
5962
5963 // `a[i]=(elements)` / `a[i,j]=(elements)` / `a[i]=()`
5964 // — subscripted-array assign with array RHS. Stack pushed by
5965 // compile_assign as: [elem0, elem1, …, elemN-1, name, key].
5966 vm.register_builtin(BUILTIN_SET_SUBSCRIPT_RANGE, |vm, argc| {
5967 let n = argc as usize;
5968 let mut popped: Vec<fusevm::Value> = Vec::with_capacity(n);
5969 for _ in 0..n {
5970 popped.push(vm.pop());
5971 }
5972 popped.reverse();
5973 if popped.len() < 2 {
5974 return fusevm::Value::Status(1);
5975 }
5976 let key = popped.pop().unwrap().to_str();
5977 let name = popped.pop().unwrap().to_str();
5978 let mut values: Vec<String> = Vec::new();
5979 for v in popped {
5980 match v {
5981 fusevm::Value::Array(items) => {
5982 for it in items {
5983 values.push(it.to_str());
5984 }
5985 }
5986 other => values.push(other.to_str()),
5987 }
5988 }
5989 with_executor(|exec| {
5990 // Read paramtab-first, mutate, write back via canonical
5991 // set_array so subscript-slice/index assignments are
5992 // visible to both the paramtab single source and the
5993 // legacy cache.
5994 let mut arr = exec.array(&name).unwrap_or_default();
5995 // Slice form `a[i,j]=(values)` — replace the inclusive
5996 // slice. Negative bounds count from end. Out-of-range high
5997 // bound clamps to len; low bound below 1 clamps to 1.
5998 if let Some((s_str, e_str)) = key.split_once(',') {
5999 let len = arr.len() as i64;
6000 let resolve = |s: &str| -> i64 { s.trim().parse::<i64>().unwrap_or_default() };
6001 let s_raw = resolve(s_str);
6002 let e_raw = resolve(e_str);
6003 let lo = if s_raw < 0 {
6004 (len + s_raw + 1).max(1)
6005 } else {
6006 s_raw.max(1)
6007 };
6008 let hi = if e_raw < 0 {
6009 (len + e_raw + 1).max(0)
6010 } else {
6011 e_raw.max(0)
6012 };
6013 let lo_idx = (lo - 1) as usize;
6014 let hi_idx = ((hi as usize).min(arr.len())).max(lo_idx);
6015 let _: Vec<String> = arr.splice(lo_idx..hi_idx, values).collect();
6016 exec.set_array(name, arr);
6017 return;
6018 }
6019 // Single-int key. `a[i]=()` (empty values) removes the
6020 // element at that index. Otherwise treat as a multi-element
6021 // splice starting at i.
6022 let i: i64 = match key.trim().parse::<i64>() {
6023 Ok(n) => n,
6024 Err(_) => return,
6025 };
6026 let len = arr.len() as i64;
6027 let idx = if i > 0 {
6028 (i - 1) as usize
6029 } else if i < 0 {
6030 let off = len + i;
6031 if off < 0 {
6032 return;
6033 }
6034 off as usize
6035 } else {
6036 return;
6037 };
6038 if values.is_empty() {
6039 if idx < arr.len() {
6040 arr.remove(idx);
6041 }
6042 } else {
6043 let end = (idx + 1).min(arr.len());
6044 let _: Vec<String> = arr.splice(idx..end, values).collect();
6045 }
6046 exec.set_array(name, arr);
6047 });
6048 fusevm::Value::Status(0)
6049 });
6050
6051 // BUILTIN_CONCAT_SPLICE — word-segment concat with first/last
6052 // sticking (default zsh splice semantics for `${arr[@]}`, `$@`).
6053 vm.register_builtin(BUILTIN_CONCAT_SPLICE, |vm, _argc| {
6054 let rhs = vm.pop();
6055 let lhs = vm.pop();
6056 match (lhs, rhs) {
6057 (fusevm::Value::Array(mut la), fusevm::Value::Array(ra)) => {
6058 if la.is_empty() {
6059 return fusevm::Value::Array(ra);
6060 }
6061 if ra.is_empty() {
6062 return fusevm::Value::Array(la);
6063 }
6064 // Last of la merges with first of ra; rest unchanged.
6065 let last_l = la.pop().unwrap();
6066 let mut ra_iter = ra.into_iter();
6067 let first_r = ra_iter.next().unwrap();
6068 let l_s = last_l.as_str_cow();
6069 let r_s = first_r.as_str_cow();
6070 let mut merged = String::with_capacity(l_s.len() + r_s.len());
6071 merged.push_str(&l_s);
6072 merged.push_str(&r_s);
6073 la.push(fusevm::Value::str(merged));
6074 la.extend(ra_iter);
6075 fusevm::Value::Array(la)
6076 }
6077 (fusevm::Value::Array(mut la), rhs_scalar) => {
6078 if la.is_empty() {
6079 return fusevm::Value::str(rhs_scalar.as_str_cow().to_string());
6080 }
6081 let last = la.pop().unwrap();
6082 let l_s = last.as_str_cow();
6083 let r_s = rhs_scalar.as_str_cow();
6084 let mut s = String::with_capacity(l_s.len() + r_s.len());
6085 s.push_str(&l_s);
6086 s.push_str(&r_s);
6087 la.push(fusevm::Value::str(s));
6088 fusevm::Value::Array(la)
6089 }
6090 (lhs_scalar, fusevm::Value::Array(mut ra)) => {
6091 if ra.is_empty() {
6092 return fusevm::Value::str(lhs_scalar.as_str_cow().to_string());
6093 }
6094 let first = ra.remove(0);
6095 let l_s = lhs_scalar.as_str_cow();
6096 let r_s = first.as_str_cow();
6097 let mut s = String::with_capacity(l_s.len() + r_s.len());
6098 s.push_str(&l_s);
6099 s.push_str(&r_s);
6100 let mut out = Vec::with_capacity(ra.len() + 1);
6101 out.push(fusevm::Value::str(s));
6102 out.extend(ra);
6103 fusevm::Value::Array(out)
6104 }
6105 (lhs_s, rhs_s) => {
6106 let l = lhs_s.as_str_cow();
6107 let r = rhs_s.as_str_cow();
6108 let mut s = String::with_capacity(l.len() + r.len());
6109 s.push_str(&l);
6110 s.push_str(&r);
6111 fusevm::Value::str(s)
6112 }
6113 }
6114 });
6115
6116 // BUILTIN_CONCAT_DISTRIBUTE — word-segment concat. With
6117 // rcexpandparam (zsh option), distributes element-wise (cartesian
6118 // product). Default mode: joins arrays with IFS first char to a
6119 // single scalar before concat, matching zsh's default unquoted
6120 // and DQ semantics. Direct port of Src/subst.c sepjoin path
6121 // (line ~1813) which gates element-vs-join on the rc_expand_param
6122 // option, defaulting to join.
6123 // BUILTIN_CONCAT_DISTRIBUTE_FORCED — same shape as
6124 // CONCAT_DISTRIBUTE, but always cartesian-distributes when one
6125 // side is Array. Used for compile-time-detected explicit
6126 // distribution forms (`${^arr}` etc.) where the source flag
6127 // overrides the rcexpandparam option default.
6128 vm.register_builtin(BUILTIN_CONCAT_DISTRIBUTE_FORCED, |vm, _argc| {
6129 let rhs = vm.pop();
6130 let lhs = vm.pop();
6131 match (lhs, rhs) {
6132 (fusevm::Value::Array(la), fusevm::Value::Array(ra)) => {
6133 if ra.is_empty() {
6134 return fusevm::Value::Array(la);
6135 }
6136 if la.is_empty() {
6137 return fusevm::Value::Array(ra);
6138 }
6139 let mut out = Vec::with_capacity(la.len() * ra.len());
6140 for a in &la {
6141 let a_s = a.as_str_cow();
6142 for b in &ra {
6143 let b_s = b.as_str_cow();
6144 let mut s = String::with_capacity(a_s.len() + b_s.len());
6145 s.push_str(&a_s);
6146 s.push_str(&b_s);
6147 out.push(fusevm::Value::str(s));
6148 }
6149 }
6150 fusevm::Value::Array(out)
6151 }
6152 (fusevm::Value::Array(la), rhs_scalar) => {
6153 let r = rhs_scalar.as_str_cow();
6154 let out: Vec<fusevm::Value> = la
6155 .into_iter()
6156 .map(|a| {
6157 let a_s = a.as_str_cow();
6158 let mut s = String::with_capacity(a_s.len() + r.len());
6159 s.push_str(&a_s);
6160 s.push_str(&r);
6161 fusevm::Value::str(s)
6162 })
6163 .collect();
6164 fusevm::Value::Array(out)
6165 }
6166 (lhs_scalar, fusevm::Value::Array(ra)) => {
6167 let l = lhs_scalar.as_str_cow();
6168 let out: Vec<fusevm::Value> = ra
6169 .into_iter()
6170 .map(|b| {
6171 let b_s = b.as_str_cow();
6172 let mut s = String::with_capacity(l.len() + b_s.len());
6173 s.push_str(&l);
6174 s.push_str(&b_s);
6175 fusevm::Value::str(s)
6176 })
6177 .collect();
6178 fusevm::Value::Array(out)
6179 }
6180 (lhs_s, rhs_s) => {
6181 let l = lhs_s.as_str_cow();
6182 let r = rhs_s.as_str_cow();
6183 let mut s = String::with_capacity(l.len() + r.len());
6184 s.push_str(&l);
6185 s.push_str(&r);
6186 fusevm::Value::str(s)
6187 }
6188 }
6189 });
6190
6191 vm.register_builtin(BUILTIN_CONCAT_DISTRIBUTE, |vm, _argc| {
6192 let rhs = vm.pop();
6193 let lhs = vm.pop();
6194 let rc_expand = with_executor(|exec| {
6195 crate::ported::options::opt_state_get("rcexpandparam").unwrap_or(false)
6196 });
6197 let ifs_first = || -> String {
6198 with_executor(|exec| {
6199 exec.get_variable("IFS")
6200 .chars()
6201 .next()
6202 .map(|c| c.to_string())
6203 .unwrap_or_else(|| " ".to_string())
6204 })
6205 };
6206 // Helper: join an Array to scalar via IFS-first.
6207 let join_arr = |arr: Vec<fusevm::Value>| -> String {
6208 let sep = ifs_first();
6209 arr.iter()
6210 .map(|v| v.as_str_cow().into_owned())
6211 .collect::<Vec<_>>()
6212 .join(&sep)
6213 };
6214 if !rc_expand {
6215 // Default: join any Array side to scalar, then concat.
6216 let l = match lhs {
6217 fusevm::Value::Array(a) => join_arr(a),
6218 other => other.as_str_cow().into_owned(),
6219 };
6220 let r = match rhs {
6221 fusevm::Value::Array(a) => join_arr(a),
6222 other => other.as_str_cow().into_owned(),
6223 };
6224 let mut s = String::with_capacity(l.len() + r.len());
6225 s.push_str(&l);
6226 s.push_str(&r);
6227 return fusevm::Value::str(s);
6228 }
6229 match (lhs, rhs) {
6230 (fusevm::Value::Array(la), fusevm::Value::Array(ra)) => {
6231 // Cartesian product: [a + b for a in la for b in ra].
6232 let mut out = Vec::with_capacity(la.len() * ra.len().max(1));
6233 if ra.is_empty() {
6234 return fusevm::Value::Array(la);
6235 }
6236 if la.is_empty() {
6237 return fusevm::Value::Array(ra);
6238 }
6239 for a in &la {
6240 let a_s = a.as_str_cow();
6241 for b in &ra {
6242 let b_s = b.as_str_cow();
6243 let mut s = String::with_capacity(a_s.len() + b_s.len());
6244 s.push_str(&a_s);
6245 s.push_str(&b_s);
6246 out.push(fusevm::Value::str(s));
6247 }
6248 }
6249 fusevm::Value::Array(out)
6250 }
6251 (fusevm::Value::Array(la), rhs_scalar) => {
6252 let r = rhs_scalar.as_str_cow();
6253 let out: Vec<fusevm::Value> = la
6254 .into_iter()
6255 .map(|a| {
6256 let a_s = a.as_str_cow();
6257 let mut s = String::with_capacity(a_s.len() + r.len());
6258 s.push_str(&a_s);
6259 s.push_str(&r);
6260 fusevm::Value::str(s)
6261 })
6262 .collect();
6263 fusevm::Value::Array(out)
6264 }
6265 (lhs_scalar, fusevm::Value::Array(ra)) => {
6266 let l = lhs_scalar.as_str_cow();
6267 let out: Vec<fusevm::Value> = ra
6268 .into_iter()
6269 .map(|b| {
6270 let b_s = b.as_str_cow();
6271 let mut s = String::with_capacity(l.len() + b_s.len());
6272 s.push_str(&l);
6273 s.push_str(&b_s);
6274 fusevm::Value::str(s)
6275 })
6276 .collect();
6277 fusevm::Value::Array(out)
6278 }
6279 (lhs_s, rhs_s) => {
6280 // Fast path: both scalar → identical to Op::Concat.
6281 let l = lhs_s.as_str_cow();
6282 let r = rhs_s.as_str_cow();
6283 let mut s = String::with_capacity(l.len() + r.len());
6284 s.push_str(&l);
6285 s.push_str(&r);
6286 fusevm::Value::str(s)
6287 }
6288 }
6289 });
6290
6291 // `[[ a -ef b ]]` — same-inode test. Resolves both paths via fs::metadata
6292 // (follows symlinks the way zsh's -ef does) and compares (dev, inode).
6293 // Returns false on any I/O error (path missing, permission denied, etc.).
6294 vm.register_builtin(BUILTIN_SAME_FILE, |vm, _argc| {
6295 let b = vm.pop().to_str();
6296 let a = vm.pop().to_str();
6297 let same = match (std::fs::metadata(&a), std::fs::metadata(&b)) {
6298 (Ok(ma), Ok(mb)) => ma.dev() == mb.dev() && ma.ino() == mb.ino(),
6299 _ => false,
6300 };
6301 fusevm::Value::Bool(same)
6302 });
6303
6304 // `[[ -c path ]]` — character device.
6305 vm.register_builtin(BUILTIN_IS_CHARDEV, |vm, _argc| {
6306 let path = vm.pop().to_str();
6307 let result = std::fs::metadata(&path)
6308 .map(|m| m.file_type().is_char_device())
6309 .unwrap_or(false);
6310 fusevm::Value::Bool(result)
6311 });
6312 // `[[ -b path ]]` — block device.
6313 vm.register_builtin(BUILTIN_IS_BLOCKDEV, |vm, _argc| {
6314 let path = vm.pop().to_str();
6315 let result = std::fs::metadata(&path)
6316 .map(|m| m.file_type().is_block_device())
6317 .unwrap_or(false);
6318 fusevm::Value::Bool(result)
6319 });
6320 // `[[ -p path ]]` — FIFO (named pipe).
6321 vm.register_builtin(BUILTIN_IS_FIFO, |vm, _argc| {
6322 let path = vm.pop().to_str();
6323 let result = std::fs::metadata(&path)
6324 .map(|m| m.file_type().is_fifo())
6325 .unwrap_or(false);
6326 fusevm::Value::Bool(result)
6327 });
6328 // `[[ -S path ]]` — socket.
6329 vm.register_builtin(BUILTIN_IS_SOCKET, |vm, _argc| {
6330 let path = vm.pop().to_str();
6331 let result = std::fs::symlink_metadata(&path)
6332 .map(|m| m.file_type().is_socket())
6333 .unwrap_or(false);
6334 fusevm::Value::Bool(result)
6335 });
6336
6337 // `[[ -k path ]]` / `-u` / `-g` — sticky / setuid / setgid bit.
6338 vm.register_builtin(BUILTIN_HAS_STICKY, |vm, _argc| {
6339 let path = vm.pop().to_str();
6340 let result = std::fs::metadata(&path)
6341 .map(|m| m.permissions().mode() & libc::S_ISVTX as u32 != 0)
6342 .unwrap_or(false);
6343 fusevm::Value::Bool(result)
6344 });
6345 vm.register_builtin(BUILTIN_HAS_SETUID, |vm, _argc| {
6346 let path = vm.pop().to_str();
6347 let result = std::fs::metadata(&path)
6348 .map(|m| m.permissions().mode() & libc::S_ISUID as u32 != 0)
6349 .unwrap_or(false);
6350 fusevm::Value::Bool(result)
6351 });
6352 vm.register_builtin(BUILTIN_HAS_SETGID, |vm, _argc| {
6353 let path = vm.pop().to_str();
6354 let result = std::fs::metadata(&path)
6355 .map(|m| m.permissions().mode() & libc::S_ISGID as u32 != 0)
6356 .unwrap_or(false);
6357 fusevm::Value::Bool(result)
6358 });
6359 vm.register_builtin(BUILTIN_OWNED_BY_USER, |vm, _argc| {
6360 let path = vm.pop().to_str();
6361 let euid = unsafe { libc::geteuid() };
6362 let result = std::fs::metadata(&path)
6363 .map(|m| m.uid() == euid)
6364 .unwrap_or(false);
6365 fusevm::Value::Bool(result)
6366 });
6367 vm.register_builtin(BUILTIN_OWNED_BY_GROUP, |vm, _argc| {
6368 let path = vm.pop().to_str();
6369 let egid = unsafe { libc::getegid() };
6370 let result = std::fs::metadata(&path)
6371 .map(|m| m.gid() == egid)
6372 .unwrap_or(false);
6373 fusevm::Value::Bool(result)
6374 });
6375
6376 // `[[ -N path ]]` — file's access time is NOT newer than its
6377 // modification time (zsh man: "true if file exists and its
6378 // access time is not newer than its modification time"). Used
6379 // by zsh's mailbox-watching code. The semantic is `atime <=
6380 // mtime` (equivalent to `mtime >= atime`) — equal counts as
6381 // true, which a strict `mtime > atime` check missed for newly
6382 // created files where both stamps are identical.
6383 vm.register_builtin(BUILTIN_FILE_MODIFIED_SINCE_ACCESS, |vm, _argc| {
6384 let path = vm.pop().to_str();
6385 let result = std::fs::metadata(&path)
6386 .map(|m| m.atime() <= m.mtime())
6387 .unwrap_or(false);
6388 fusevm::Value::Bool(result)
6389 });
6390
6391 // `[[ a -nt b ]]` — true if `a`'s mtime is strictly later than `b`'s.
6392 // BOTH files must exist; if either is missing the result is false.
6393 // (Earlier behavior was bash's "missing == infinitely-old"; zsh
6394 // strictly requires both files to exist.)
6395 vm.register_builtin(BUILTIN_FILE_NEWER, |vm, _argc| {
6396 let b = vm.pop().to_str();
6397 let a = vm.pop().to_str();
6398 // Use SystemTime modified() for nanosecond precision —
6399 // MetadataExt::mtime() returns seconds only, so two files
6400 // touched within the same second compared equal even when
6401 // 500ms apart. zsh tracks ns and uses `>=` for ties (touching
6402 // a then b in quick succession should still report b newer).
6403 let ta = std::fs::metadata(&a).and_then(|m| m.modified()).ok();
6404 let tb = std::fs::metadata(&b).and_then(|m| m.modified()).ok();
6405 let result = match (ta, tb) {
6406 (Some(ta), Some(tb)) => ta > tb,
6407 _ => false,
6408 };
6409 fusevm::Value::Bool(result)
6410 });
6411
6412 // `[[ a -ot b ]]` — mirror of -nt. Same both-must-exist contract.
6413 vm.register_builtin(BUILTIN_FILE_OLDER, |vm, _argc| {
6414 let b = vm.pop().to_str();
6415 let a = vm.pop().to_str();
6416 let ta = std::fs::metadata(&a).and_then(|m| m.modified()).ok();
6417 let tb = std::fs::metadata(&b).and_then(|m| m.modified()).ok();
6418 let result = match (ta, tb) {
6419 (Some(ta), Some(tb)) => ta < tb,
6420 _ => false,
6421 };
6422 fusevm::Value::Bool(result)
6423 });
6424
6425 // `set -e` / `setopt errexit` post-command check. Compiler emits
6426 // this after each top-level command's SetStatus (skipped inside
6427 // conditionals/pipelines/&&||/`!`). If errexit is on AND the last
6428 // command exited non-zero AND it's not a `return` from a function,
6429 // exit the shell with that status.
6430 // `set -x` / `setopt xtrace` — print each command before it runs.
6431 // The compiler emits this BEFORE the actual builtin/external call
6432 // with the command's literal text as a single string arg. We
6433 // print to stderr if xtrace is on. Honors `$PS4` (default `+ `).
6434 //
6435 // ── XTRACE flow control ────────────────────────────────────────
6436 // Mirror of C zsh's `doneps4` flag in execcmd_exec (Src/exec.c).
6437 // When an assignment trace fires (XTRACE_ASSIGN), it emits PS4
6438 // and sets this flag so the subsequent XTRACE_ARGS skips its own
6439 // PS4 emission — the assignment + command end up on the SAME
6440 // line: `<PS4>a=1 echo hello\n`. XTRACE_ARGS / XTRACE_NEWLINE
6441 // reset the flag after emitting the trailing `\n`.
6442 vm.register_builtin(BUILTIN_XTRACE_LINE, |vm, _argc| {
6443 let cmd_text = vm.pop().to_str();
6444 // Sync exec.last_status with the live vm.last_status BEFORE
6445 // the next command runs. Direct port of the zsh exec.c
6446 // contract — `$?` reads the exit status of the *most recent*
6447 // command. XTRACE_LINE is emitted by the compiler BEFORE
6448 // every simple command, so it's the natural sync point.
6449 let live = vm.last_status;
6450 with_executor(|exec| {
6451 exec.set_last_status(live);
6452 });
6453 // C zsh emits xtrace for `(( … ))` / `[[ … ]]` / `case` /
6454 // `if/while/until/for/repeat` head expressions via
6455 // `printprompt4(); fprintf(xtrerr, "%s\n", expr)` at
6456 // Src/exec.c:5240 (math), c:5286 (cond), c:4117 (for), etc.
6457 // The compiler emits BUILTIN_XTRACE_LINE only at those
6458 // construct boundaries (compile_arith / compile_cond /
6459 // compile_if / compile_while / compile_for / compile_case);
6460 // simple commands route to BUILTIN_XTRACE_ARGS instead. So
6461 // this handler always emits when xtrace is on — no prefix-
6462 // string heuristic.
6463 let on = with_executor(|exec|
6464 crate::ported::options::opt_state_get("xtrace").unwrap_or(false));
6465 if on {
6466 let already = XTRACE_DONE_PS4.with(|f| f.get());
6467 if !already {
6468 printprompt4();
6469 }
6470 eprintln!("{}", cmd_text);
6471 XTRACE_DONE_PS4.with(|f| f.set(false));
6472 }
6473 fusevm::Value::Status(0)
6474 });
6475
6476 // Like XTRACE_LINE but reads the top `argc - 1` values from the
6477 // VM stack WITHOUT consuming them (peek), then pops a prefix
6478 // string at the top. Joins prefix + peeked args with spaces using
6479 // zsh's quotedzputs-equivalent quoting. Direct port of
6480 // Src/exec.c:2055-2066 — emit AFTER expansion, with each arg
6481 // shell-quoted, so `for i in a b; echo for $i` traces as
6482 // `echo for a` / `echo for b`, not `echo for $i`.
6483 //
6484 // Stack contract on entry: [arg1, arg2, ..., argN, prefix].
6485 // Pops prefix; peeks argN..arg1 below. argc = N + 1.
6486 vm.register_builtin(BUILTIN_XTRACE_ARGS, |vm, argc| {
6487 let prefix = vm.pop().to_str();
6488 let live = vm.last_status;
6489 with_executor(|exec| {
6490 exec.set_last_status(live);
6491 });
6492 let on = with_executor(|exec| crate::ported::options::opt_state_get("xtrace").unwrap_or(false));
6493 if on {
6494 let n_args = argc.saturating_sub(1) as usize;
6495 let len = vm.stack.len();
6496 let arg_strs: Vec<String> = if n_args > 0 && len >= n_args {
6497 vm.stack[len - n_args..]
6498 .iter()
6499 .map(|v| quotedzputs(&v.to_str()))
6500 .collect()
6501 } else {
6502 Vec::new()
6503 };
6504 // Builtins dispatch through `execbuiltin` (Src/builtin.c:442)
6505 // which emits its own PS4 + name + args xtrace. To avoid
6506 // double-emission, skip our emission here when the first
6507 // arg is a known builtin with a registered HandlerFunc —
6508 // those go through execbuiltin and will trace themselves.
6509 //
6510 // Externals + builtins without HandlerFunc (still pending
6511 // canonical port) keep our emission as a stand-in until
6512 // they migrate over.
6513 // The `prefix` IS the command name (first whitespace-token
6514 // of the original cmd text). If a BUILTIN entry with a
6515 // HandlerFunc matches, execbuiltin will emit xtrace there.
6516 let goes_through_execbuiltin = crate::ported::builtin::BUILTINS
6517 .iter()
6518 .any(|b| b.node.nam == prefix && b.handlerfunc.is_some());
6519 if !goes_through_execbuiltin {
6520 let line = if arg_strs.is_empty() {
6521 prefix
6522 } else {
6523 format!("{} {}", prefix, arg_strs.join(" "))
6524 };
6525 // Mirrors Src/exec.c:2055 xtrace emission. C does:
6526 // if (!doneps4) printprompt4();
6527 // ... emit args + spaces ...
6528 // fputc('\n', xtrerr); fflush(xtrerr);
6529 // We honor doneps4 via XTRACE_DONE_PS4 — if a prior
6530 // XTRACE_ASSIGN this line already emitted PS4, skip
6531 // it. Then reset the flag after the trailing newline
6532 // so the next command starts fresh.
6533 let already_ps4 = XTRACE_DONE_PS4.with(|f| f.get());
6534 if !already_ps4 {
6535 printprompt4();
6536 }
6537 eprintln!("{}", line);
6538 }
6539 XTRACE_DONE_PS4.with(|f| f.set(false));
6540 }
6541 fusevm::Value::Status(0)
6542 });
6543
6544 // BUILTIN_XTRACE_ASSIGN — direct port of the per-assignment
6545 // trace block at Src/exec.c:2517-2582. C body excerpt:
6546 // xtr = isset(XTRACE);
6547 // if (xtr) { printprompt4(); doneps4 = 1; }
6548 // while (assign) {
6549 // if (xtr) fprintf(xtrerr, "%s+=" or "%s=", name);
6550 // ... eval value into `val` ...
6551 // if (xtr) { quotedzputs(val, xtrerr); fputc(' ', xtrerr); }
6552 // ...
6553 // }
6554 //
6555 // Stack on entry: [..., name, value]. PEEKS both (they're left
6556 // on stack for SET_VAR to pop). Emits `name=<quoted-val> ` with
6557 // no newline; trailing `\n` comes from XTRACE_ARGS (cmd path)
6558 // or XTRACE_NEWLINE (assignment-only path).
6559 vm.register_builtin(BUILTIN_XTRACE_ASSIGN, |vm, _argc| {
6560 let on = with_executor(|exec| crate::ported::options::opt_state_get("xtrace").unwrap_or(false));
6561 if on {
6562 // PEEK [..., name, value] — argc==2 by contract.
6563 let len = vm.stack.len();
6564 if len >= 2 {
6565 let name = vm.stack[len - 2].to_str();
6566 let value = vm.stack[len - 1].to_str();
6567 let already_ps4 = XTRACE_DONE_PS4.with(|f| f.get());
6568 if !already_ps4 {
6569 printprompt4();
6570 XTRACE_DONE_PS4.with(|f| f.set(true));
6571 }
6572 // C: `fprintf(xtrerr, "%s=", name)` then `quotedzputs
6573 // (val); fputc(' ', xtrerr);`. Emit no newline.
6574 eprint!("{}={} ", name, quotedzputs(&value));
6575 }
6576 }
6577 fusevm::Value::Status(0)
6578 });
6579
6580 // BUILTIN_XTRACE_NEWLINE — emit trailing `\n` + flush iff a
6581 // prior XTRACE_ASSIGN this line already emitted PS4. Mirrors
6582 // C's `fputc('\n', xtrerr); fflush(xtrerr);` at exec.c:3398
6583 // (the assignment-only path through execcmd_exec).
6584 vm.register_builtin(BUILTIN_XTRACE_NEWLINE, |_vm, _argc| {
6585 let on = with_executor(|exec| crate::ported::options::opt_state_get("xtrace").unwrap_or(false));
6586 if on {
6587 let already_ps4 = XTRACE_DONE_PS4.with(|f| f.get());
6588 if already_ps4 {
6589 eprintln!();
6590 XTRACE_DONE_PS4.with(|f| f.set(false));
6591 }
6592 }
6593 fusevm::Value::Status(0)
6594 });
6595
6596 vm.register_builtin(BUILTIN_ERREXIT_CHECK, |vm, _argc| {
6597 let last = vm.last_status;
6598 if last == 0 {
6599 return fusevm::Value::Status(0);
6600 }
6601 // ZERR / ERR trap fires whenever a command exits non-zero
6602 // (zsh signals.c handle_signals path). Read the trap body
6603 // BEFORE the errexit check so a trap on the failing
6604 // command's last command can run before we exit.
6605 let zerr_body = with_executor(|exec| {
6606 exec.traps
6607 .get("ZERR")
6608 .cloned()
6609 .or_else(|| exec.traps.get("ERR").cloned())
6610 });
6611 if let Some(body) = zerr_body {
6612 // Run the trap. Don't recurse on the trap's own failure
6613 // (clear last_status during the run).
6614 with_executor(|exec| {
6615 let saved = exec.last_status();
6616 exec.set_last_status(0);
6617 let _ = exec.execute_script(&body);
6618 exec.set_last_status(saved);
6619 });
6620 }
6621 let should_exit = with_executor(|exec| {
6622 // zsh stores the option as `errexit` (default OFF). Honor
6623 // both keys (`errexit=true` from `setopt errexit` /
6624 // `set -o errexit`, and `set -e` which currently writes
6625 // `errexit=true` too). Also suppress when inside a function
6626 // call — zsh's errexit lets functions handle their own
6627 // failures unless ERR_RETURN is also set. Also suppress
6628 // when inside a subshell — the in-process snapshot/restore
6629 // doesn't have a process-isolation boundary, so a real
6630 // `process::exit` would tear down the parent shell. Match
6631 // zsh's "errexit aborts the subshell only" by leaving the
6632 // parent alive (subshell continues until natural end).
6633 // errexit lives in two stores. `set -e` / `setopt errexit`
6634 // write through bin_setopt → OPTS_LIVE (canonical
6635 // `opts[ERREXIT]` per Src/options.c:46). Older paths still
6636 // populate `exec.options`. Check both — agree when EITHER
6637 // says on.
6638 let on_canonical = crate::ported::zsh_h::isset(
6639 crate::ported::zsh_h::ERREXIT);
6640 let on_legacy = crate::ported::options::opt_state_get("errexit").unwrap_or(false);
6641 (on_canonical || on_legacy)
6642 && exec.local_scope_depth == 0
6643 && exec.subshell_snapshots.is_empty()
6644 });
6645 if should_exit {
6646 std::process::exit(last);
6647 }
6648 fusevm::Value::Status(last)
6649 });
6650
6651 // `${var:-default}` / `${var:=default}` / `${var:?error}` / `${var:+alt}`
6652 // Pops [name, op_byte, rhs] (rhs popped first). Returns the modified
6653 // value as Value::Str. Handles unset/empty distinction (`:-` etc.
6654 // treat empty same as unset, matching POSIX).
6655 vm.register_builtin(BUILTIN_PARAM_DEFAULT_FAMILY, |vm, _argc| {
6656 let rhs = vm.pop().to_str();
6657 let op = vm.pop().to_int() as u8;
6658 let name = vm.pop().to_str();
6659 // Op codes:
6660 // 0 :- 1 := 2 :? 3 :+ (treat-empty-as-unset variants)
6661 // 4 - 5 = 6 ? 7 + (no-colon: only fire if truly unset)
6662 // The default/alt modifiers handle missing-var themselves, so
6663 // suppress the nounset (set -u) abort during the value lookup —
6664 // otherwise `${unset:-fb}` exits the shell instead of returning
6665 // "fb". Save/restore nounset around the lookup.
6666 let val = with_executor(|exec| {
6667 let saved_nounset = crate::ported::options::opt_state_get("nounset");
6668 let saved_unset = crate::ported::options::opt_state_get("unset");
6669 crate::ported::options::opt_state_set("nounset", false);
6670 crate::ported::options::opt_state_set("unset", true);
6671 let v = exec.get_variable(&name);
6672 match saved_nounset {
6673 Some(b) => {
6674 crate::ported::options::opt_state_set("nounset", b);
6675 }
6676 None => {
6677 crate::ported::options::opt_state_unset("nounset");
6678 }
6679 }
6680 match saved_unset {
6681 Some(b) => {
6682 crate::ported::options::opt_state_set("unset", b);
6683 }
6684 None => {
6685 crate::ported::options::opt_state_unset("unset");
6686 }
6687 }
6688 v
6689 });
6690 let is_set = with_executor(|exec| {
6691 // Positional params ($1, $2, ...): set iff index <= $#.
6692 if name.chars().all(|c| c.is_ascii_digit()) && !name.is_empty() {
6693 if let Ok(idx) = name.parse::<usize>() {
6694 if idx == 0 {
6695 return true; // $0 always set
6696 }
6697 return idx <= exec.pparams().len();
6698 }
6699 }
6700 // zsh-special "always set" params: their getter computes
6701 // a dynamic value, but the contains_key check fails. Treat
6702 // them as set so `${SECONDS-default}` returns the seconds,
6703 // not "default".
6704 let is_zsh_special = matches!(
6705 name.as_str(),
6706 "SECONDS"
6707 | "EPOCHSECONDS"
6708 | "EPOCHREALTIME"
6709 | "RANDOM"
6710 | "LINENO"
6711 | "HISTCMD"
6712 | "PPID"
6713 | "UID"
6714 | "EUID"
6715 | "GID"
6716 | "EGID"
6717 | "SHLVL"
6718 );
6719 exec.has_scalar(&name)
6720 || exec.array(&name).is_some()
6721 || exec.assoc(&name).is_some()
6722 || std::env::var(&name).is_ok()
6723 || is_zsh_special
6724 });
6725 let is_empty = val.is_empty();
6726 // For colon variants, "missing" = unset OR empty.
6727 // For no-colon variants, "missing" = unset only.
6728 let missing = match op {
6729 0..=3 => is_empty,
6730 _ => !is_set,
6731 };
6732 // Empty-unquoted-elide for default-family results. When the
6733 // resulting expansion is empty AND we're unquoted, drop the
6734 // arg. Direct port of zsh's elide-empty-words pass which
6735 // applies to ALL paramsubst results, including default-family.
6736 let in_dq = with_executor(|exec| exec.in_dq_context > 0);
6737 let maybe_elide = |s: String| -> fusevm::Value {
6738 if s.is_empty() && !in_dq {
6739 fusevm::Value::Array(Vec::new())
6740 } else {
6741 fusevm::Value::str(s)
6742 }
6743 };
6744 // The default/alt operand may contain `$var` / `$(cmd)` /
6745 // `$((expr))` — zsh expands these before substitution. Apply
6746 // expand_string lazily (only when we'll actually use rhs).
6747 let expand_rhs = |s: &str| -> String { crate::ported::subst::singsub(s) };
6748 match op {
6749 0 | 4 => {
6750 // `:-` / `-` use default if missing
6751 if missing {
6752 maybe_elide(expand_rhs(&rhs))
6753 } else {
6754 maybe_elide(val)
6755 }
6756 }
6757 1 | 5 => {
6758 // `:=` / `=` assign default if missing, then use it
6759 if missing {
6760 let expanded = expand_rhs(&rhs);
6761 with_executor(|exec| {
6762 exec.set_scalar(name, expanded.clone());
6763 });
6764 maybe_elide(expanded)
6765 } else {
6766 maybe_elide(val)
6767 }
6768 }
6769 2 | 6 => {
6770 // `:?` / `?` error if missing — zsh in -c mode prints
6771 // `zsh:LINE: NAME: msg` and exits 1. Mirror that: emit
6772 // diagnostic on stderr and abort the shell.
6773 if missing {
6774 let expanded = expand_rhs(&rhs);
6775 let msg = if expanded.is_empty() {
6776 "parameter not set".to_string()
6777 } else {
6778 expanded
6779 };
6780 eprintln!("zshrs:1: {}: {}", name, msg);
6781 std::process::exit(1);
6782 } else {
6783 fusevm::Value::str(val)
6784 }
6785 }
6786 3 | 7 => {
6787 // `:+` / `+` use alt if NOT missing (set-and-non-empty
6788 // for colon variant; just set for no-colon variant).
6789 if missing {
6790 maybe_elide(String::new())
6791 } else {
6792 maybe_elide(expand_rhs(&rhs))
6793 }
6794 }
6795 8 => {
6796 // `${+name}` set-test — emits "1" if name is set,
6797 // "0" if unset. Direct port of subst.c case '+' at
6798 // the leading-flag position (different from `${name+rhs}`).
6799 // is_set was computed above and includes positional
6800 // params, zsh-special vars, regular vars, arrays,
6801 // assocs. Subscripted form `${+arr[i]}` checks if
6802 // that specific element is set — get_variable doesn't
6803 // parse subscripts, so resolve the lookup by hand:
6804 // numeric N → arr[N-1] is set iff N <= len; (r)PAT /
6805 // (R)PAT / KEY → resolve via the same subscript
6806 // engine as plain `${arr[i]}`.
6807 if let Some(lb) = name.find('[') {
6808 if name.ends_with(']') {
6809 let arr_name = &name[..lb];
6810 let key = &name[lb + 1..name.len() - 1];
6811 let direct_set = with_executor(|exec| {
6812 // Numeric index: 1-based, must be in range.
6813 if let Ok(n) = key.parse::<i64>() {
6814 let len = exec
6815 .array(arr_name)
6816 .map(|a| a.len() as i64)
6817 .unwrap_or(0);
6818 if n > 0 && n <= len {
6819 return Some(true);
6820 }
6821 if n < 0 {
6822 let resolved = len + n;
6823 return Some(resolved >= 0);
6824 }
6825 return Some(false);
6826 }
6827 if let Some(map) = exec.assoc(arr_name) {
6828 return Some(map.contains_key(key));
6829 }
6830 if let Some(arr) = exec.array(arr_name) {
6831 let pat = if let Some(p) = key
6832 .strip_prefix("(r)")
6833 .or_else(|| key.strip_prefix("(R)"))
6834 {
6835 p
6836 } else {
6837 key
6838 };
6839 return Some(arr.iter().any(|el| {
6840 crate::exec::glob_match_static(el, pat)
6841 }));
6842 }
6843 None
6844 });
6845 // Magic-assoc fallback (commands, aliases,
6846 // functions, options, etc.) — `${+commands[ls]}`
6847 // walks PATH to answer "is ls a command". Direct
6848 // port of zsh's getindex routing through the
6849 // special-parameter getfn (Src/params.c
6850 // SPECIAL_PARAMS) when the named assoc isn't
6851 // user-declared. Re-uses the same magic_assoc_lookup
6852 // dispatcher BUILTIN_ARRAY_INDEX consults; called
6853 // outside the with_executor closure so the lookup
6854 // itself can re-enter the executor lock safely.
6855 let element_set = direct_set.unwrap_or_else(|| {
6856 magic_assoc_lookup(arr_name, key)
6857 .map(|v| !v.to_str().is_empty())
6858 .unwrap_or(false)
6859 });
6860 return fusevm::Value::str(if element_set { "1" } else { "0" });
6861 }
6862 fusevm::Value::str(if !val.is_empty() { "1" } else { "0" })
6863 } else {
6864 fusevm::Value::str(if is_set { "1" } else { "0" })
6865 }
6866 }
6867 _ => fusevm::Value::str(val),
6868 }
6869 });
6870
6871 // `${var:offset[:length]}` — substring. Pops [name, offset, length].
6872 // length == -1 means "rest of string". Negative offset counts from end.
6873 vm.register_builtin(BUILTIN_PARAM_SUBSTRING, |vm, _argc| {
6874 let length = vm.pop().to_int();
6875 let offset = vm.pop().to_int();
6876 let name = vm.pop().to_str();
6877 // `${@:offset:length}` / `${*:offset:length}` — slice
6878 // positional parameters as ARRAY elements (not chars). zsh's
6879 // semantics: 1-based, inclusive offset; length counts elems.
6880 // For arrays/assoc-values arrays, same array semantics.
6881 // `[@]`/`[*]` suffix preserved by the compile path indicates
6882 // the user wrote `${arr[@]:n}` and expects splice; return
6883 // Value::Array so downstream array-init keeps element
6884 // boundaries.
6885 let (lookup_name, force_array) = if let Some(stripped) = name
6886 .strip_suffix("[@]")
6887 .or_else(|| name.strip_suffix("[*]"))
6888 {
6889 (stripped.to_string(), true)
6890 } else {
6891 (name.clone(), false)
6892 };
6893 if lookup_name == "@" || lookup_name == "*" {
6894 let result = with_executor(|exec| slice_positionals(exec, offset, length));
6895 return fusevm::Value::Array(result.into_iter().map(fusevm::Value::str).collect());
6896 }
6897 let array_slice = with_executor(|exec| exec.array(&lookup_name));
6898 if let Some(arr) = array_slice {
6899 let result = slice_array_zero_based(&arr, offset, length);
6900 return if force_array {
6901 fusevm::Value::Array(result.into_iter().map(fusevm::Value::str).collect())
6902 } else {
6903 fusevm::Value::str(result.join(" "))
6904 };
6905 }
6906 let name = lookup_name;
6907 let val = with_executor(|exec| exec.get_variable(&name));
6908 let chars: Vec<char> = val.chars().collect();
6909 let len = chars.len() as i64;
6910 let start = if offset < 0 {
6911 (len + offset).max(0) as usize
6912 } else {
6913 (offset as usize).min(chars.len())
6914 };
6915 // length sentinels:
6916 // i64::MIN → no length given, take rest of string
6917 // negative → "stop N chars before end" (bash/zsh)
6918 // positive → take exactly N chars
6919 let take = if length == i64::MIN {
6920 chars.len().saturating_sub(start)
6921 } else if length < 0 {
6922 // Stop |length| chars before end.
6923 let end = (len + length).max(start as i64) as usize;
6924 end.saturating_sub(start)
6925 } else {
6926 (length as usize).min(chars.len().saturating_sub(start))
6927 };
6928 let result: String = chars.iter().skip(start).take(take).collect();
6929 fusevm::Value::str(result)
6930 });
6931
6932 // `${var:offset[:length]}` with arith/var-based offset/length —
6933 // the literal-int variant above can't represent `${s:$n:2}`.
6934 // Stack layout (top→bottom): has_length, length_expr, offset_expr,
6935 // name. has_length distinguishes "no length given" from
6936 // "length=0".
6937 vm.register_builtin(BUILTIN_PARAM_SUBSTRING_EXPR, |vm, _argc| {
6938 let has_len = vm.pop().to_int() != 0;
6939 let len_expr = vm.pop().to_str();
6940 let off_expr = vm.pop().to_str();
6941 let name = vm.pop().to_str();
6942 // Match BUILTIN_PARAM_SUBSTRING's array-aware dispatch:
6943 // `${@:n:m}` / `${arr[@]:n:m}` slice positionals/array
6944 // ELEMENTS, not chars. Without this, the expr-form fell
6945 // back to scalar char-slicing on the IFS-joined value.
6946 let (lookup_name, force_array) = if let Some(stripped) = name
6947 .strip_suffix("[@]")
6948 .or_else(|| name.strip_suffix("[*]"))
6949 {
6950 (stripped.to_string(), true)
6951 } else {
6952 (name.clone(), false)
6953 };
6954 // Use a dual-result: Array when force_array, Str otherwise.
6955 // zsh: `${a[@]:1}` keeps array splice for downstream array
6956 // assignment (`b=("${a[@]:1}")` should give 2 elements, not
6957 // a single space-joined string).
6958 enum Result {
6959 Str(String),
6960 Arr(Vec<String>),
6961 }
6962 let result = with_executor(|exec| {
6963 let offset = crate::ported::math::mathevali(&crate::ported::subst::singsub(&off_expr)).unwrap_or(0);
6964 let length_opt: Option<i64> = if has_len {
6965 Some(crate::ported::math::mathevali(&crate::ported::subst::singsub(&len_expr)).unwrap_or(0))
6966 } else {
6967 None
6968 };
6969 // Positional-param slice (`${@:1:2}`).
6970 if lookup_name == "@" || lookup_name == "*" {
6971 let parts = slice_positionals(exec, offset, length_opt.unwrap_or(i64::MIN));
6972 return Result::Arr(parts);
6973 }
6974 // Array slice (`${arr:1:2}` or `${arr[@]:1:2}`).
6975 if let Some(arr) = exec.array(&lookup_name) {
6976 let sliced = slice_array_zero_based(&arr, offset, length_opt.unwrap_or(i64::MIN));
6977 return if force_array {
6978 Result::Arr(sliced)
6979 } else {
6980 Result::Str(sliced.join(" "))
6981 };
6982 }
6983 // Scalar fallback.
6984 let val = exec.get_variable(&lookup_name);
6985 let chars: Vec<char> = val.chars().collect();
6986 let len = chars.len() as i64;
6987 let start = if offset < 0 {
6988 (len + offset).max(0) as usize
6989 } else {
6990 (offset as usize).min(chars.len())
6991 };
6992 let take = match length_opt {
6993 None => chars.len().saturating_sub(start),
6994 Some(length) if length < 0 => chars.len().saturating_sub(start),
6995 Some(length) => (length as usize).min(chars.len().saturating_sub(start)),
6996 };
6997 Result::Str(chars.iter().skip(start).take(take).collect::<String>())
6998 });
6999 match result {
7000 Result::Str(s) => fusevm::Value::str(s),
7001 Result::Arr(parts) => {
7002 fusevm::Value::Array(parts.into_iter().map(fusevm::Value::str).collect())
7003 }
7004 }
7005 });
7006
7007 // `${var#pat}` / `${var##pat}` / `${var%pat}` / `${var%%pat}`
7008 // Pops [name, pattern, op_byte]. op: 0=`#` short-prefix, 1=`##` long,
7009 // 2=`%` short-suffix, 3=`%%` long. Glob-pattern matching via the
7010 // existing glob_match_static helper.
7011 vm.register_builtin(BUILTIN_PARAM_STRIP, |vm, _argc| {
7012 // The compiler now passes `dq_flag` as a 4th arg so the
7013 // runtime can distinguish DQ-wrapped (join-then-strip)
7014 // from unquoted (per-element) on array-valued names.
7015 // Mirrors zsh's pattern.c split between `getmatch` (joined
7016 // scalar) and `getmatcharr` (per-element).
7017 let dq_flag = vm.pop().to_int() != 0;
7018 let op = vm.pop().to_int() as u8;
7019 let pattern_raw = vm.pop().to_str();
7020 let name = vm.pop().to_str();
7021 // SUB_M / SUB_S flags. M = return matched portion (vs strip
7022 // result). S = search anywhere instead of anchored to start
7023 // (#/##) or end (%/%%). Direct port of subst.c:2171/2186
7024 // SUB_MATCH / SUB_SUBSTR bits + getmatch dispatch.
7025 let (sub_match, sub_substr) = {
7026 let sf = crate::ported::subst::sub_flags_get();
7027 let m = (sf & 0x0008) != 0;
7028 let s = (sf & 0x0004) != 0;
7029 crate::ported::subst::sub_flags_set(0);
7030 (m, s)
7031 };
7032 // Pattern may contain `$var` / `$(cmd)` / `$((expr))` — zsh
7033 // expands these before applying the strip. Was emitted as-is.
7034 let pattern = crate::ported::subst::singsub(&pattern_raw);
7035 // Delegate to the shared `strip_match_op` helper (also used
7036 // by the flag-aware `expand_braced_variable` path so M-flag
7037 // inversion works consistently). The compile-time fast path
7038 // never carries (M) since `parse_param_modifier` rejects
7039 // flag forms and routes them through the bridge — so always
7040 // pass `m_flag=false` here.
7041 // strip_match_op port — direct inline of subst.c:3540's
7042 // SUB_MATCH dispatch on the # / ## / % / %% pattern strip
7043 // ops. Op codes per ParamModifierKind::Strip:
7044 // 0 = `#` shortest prefix
7045 // 1 = `##` longest prefix
7046 // 2 = `%` shortest suffix
7047 // 3 = `%%` longest suffix
7048 // Pattern matching is currently glob-via-fnmatch from
7049 // crate::ported::glob::glob_match_static (handles ?, *, [...]).
7050 let strip_one = |v: &str, op: u8, pattern: &str| -> String {
7051 let chars: Vec<char> = v.chars().collect();
7052 let n = chars.len();
7053 // (S) substring search: instead of anchoring to start
7054 // (#/##) or end (%/%%), find the shortest/longest match
7055 // ANYWHERE in v, and either return it (sub_match) or
7056 // remove it (default — keep parts before+after the match).
7057 // Direct port of subst.c:2186 SUB_SUBSTR bit which
7058 // getmatch routes through pat_substr_match.
7059 if sub_substr { // c:2186
7060 let longest = matches!(op, 1 | 3); // c:2186 (## / %% want longest)
7061 let mut best: Option<(usize, usize)> = None; // c:2186 (start, end in chars)
7062 // Slide a window across v; for each start index
7063 // try every (longest|shortest) length that matches.
7064 for start in 0..=n { // c:2186
7065 let end_iter: Box<dyn Iterator<Item = usize>> = if longest { // c:2186
7066 Box::new((start..=n).rev()) // c:2186
7067 } else { // c:2186
7068 Box::new(start..=n) // c:2186
7069 }; // c:2186
7070 for end in end_iter { // c:2186
7071 let sub: String = chars[start..end].iter().collect(); // c:2186
7072 if crate::exec::glob_match_static(&sub, pattern) { // c:2186
7073 // (S) prefers the leftmost match
7074 // for # / ##, and the rightmost for
7075 // % / %%. # / ## scan left-to-right;
7076 // % / %% mirror by walking start
7077 // backward at the outer level — but
7078 // since the outer loop is L-to-R, we
7079 // record EVERY match and pick the
7080 // last one for %/%%, first for #/##.
7081 let suffix_op = matches!(op, 2 | 3); // c:2186
7082 if best.is_none() || suffix_op { // c:2186
7083 best = Some((start, end)); // c:2186
7084 } // c:2186
7085 if !suffix_op { break; } // c:2186 (#/## stop at first)
7086 } // c:2186
7087 } // c:2186
7088 if best.is_some() && !matches!(op, 2 | 3) { break; } // c:2186
7089 } // c:2186
7090 if let Some((s, e)) = best { // c:2186
7091 let matched: String = chars[s..e].iter().collect(); // c:2186
7092 if sub_match { // c:2171
7093 return matched; // c:2171
7094 } // c:2171
7095 let mut out = String::new(); // c:2186
7096 out.extend(chars[..s].iter()); // c:2186
7097 out.extend(chars[e..].iter()); // c:2186
7098 return out; // c:2186
7099 } // c:2186
7100 return if sub_match { String::new() } else { v.to_string() }; // c:2186
7101 } // c:2186
7102 // (M) inverted-disposition helper: when sub_match is set,
7103 // return the MATCHED portion instead of the post-strip
7104 // string. Used by zsh idioms like \${(M)path#*/} which
7105 // returns the leading "/segment" rather than the rest.
7106 // Direct port of getmatch's SUB_MATCH branch — it picks
7107 // the matched-portion view from the same scan.
7108 match op {
7109 0 => {
7110 // shortest prefix strip — try k = 0, 1, ...
7111 for k in 0..=n {
7112 let prefix: String = chars[..k].iter().collect();
7113 if crate::exec::glob_match_static(&prefix, pattern) {
7114 return if sub_match { // c:2171
7115 prefix // c:2171
7116 } else { // c:2171
7117 chars[k..].iter().collect()
7118 };
7119 }
7120 }
7121 if sub_match { String::new() } else { v.to_string() } // c:2171
7122 }
7123 1 => {
7124 // longest prefix strip — try k = n down to 0
7125 for k in (0..=n).rev() {
7126 let prefix: String = chars[..k].iter().collect();
7127 if crate::exec::glob_match_static(&prefix, pattern) {
7128 return if sub_match { // c:2171
7129 prefix // c:2171
7130 } else { // c:2171
7131 chars[k..].iter().collect()
7132 };
7133 }
7134 }
7135 if sub_match { String::new() } else { v.to_string() } // c:2171
7136 }
7137 2 => {
7138 // shortest suffix strip
7139 for k in 0..=n {
7140 let suffix: String = chars[n - k..].iter().collect();
7141 if crate::exec::glob_match_static(&suffix, pattern) {
7142 return if sub_match { // c:2171
7143 suffix // c:2171
7144 } else { // c:2171
7145 chars[..n - k].iter().collect()
7146 };
7147 }
7148 }
7149 if sub_match { String::new() } else { v.to_string() } // c:2171
7150 }
7151 3 => {
7152 // longest suffix strip
7153 for k in (0..=n).rev() {
7154 let suffix: String = chars[n - k..].iter().collect();
7155 if crate::exec::glob_match_static(&suffix, pattern) {
7156 return if sub_match { // c:2171
7157 suffix // c:2171
7158 } else { // c:2171
7159 chars[..n - k].iter().collect()
7160 };
7161 }
7162 }
7163 if sub_match { String::new() } else { v.to_string() } // c:2171
7164 }
7165 _ => v.to_string(),
7166 }
7167 };
7168 // `${arr#pat}` / `${arr%pat}` / etc. on an array:
7169 // - Unquoted form: iterate per element, preserve array
7170 // shape so `print -l` emits one line per element. Direct
7171 // port of Src/subst.c:3422-3433 `if (!vunset && isarr)`
7172 // branch which calls `getmatcharr(&aval, …)` — modifies
7173 // each element of the array in-place, leaves isarr=1.
7174 // - DQ-wrapped form (`"${arr%pat}"`): zsh joins as scalar
7175 // first then strips. So `(/tmp/foo /etc/bar)` with `%/*`
7176 // gives `/tmp/foo /etc` (last `/bar` stripped from
7177 // joined), not `/tmp /etc` (per-element).
7178 enum StripResult {
7179 Scalar(String),
7180 Array(Vec<String>),
7181 }
7182 let result: StripResult = with_executor(|exec| {
7183 let in_dq = dq_flag || exec.in_dq_context > 0;
7184 if name == "@" || name == "*" {
7185 if in_dq {
7186 let joined = exec.pparams().join(" ");
7187 return StripResult::Scalar(strip_one(&joined, op, &pattern));
7188 }
7189 let stripped: Vec<String> = exec
7190 .pparams()
7191 .iter()
7192 .map(|e| strip_one(e, op, &pattern))
7193 .collect();
7194 return StripResult::Array(stripped);
7195 }
7196 if let Some(arr) = exec.array(&name) {
7197 if in_dq {
7198 let joined = arr.join(" ");
7199 return StripResult::Scalar(strip_one(&joined, op, &pattern));
7200 }
7201 let stripped: Vec<String> = arr
7202 .iter()
7203 .map(|e| strip_one(e, op, &pattern))
7204 .collect();
7205 return StripResult::Array(stripped);
7206 }
7207 let val = exec.get_variable(&name);
7208 StripResult::Scalar(strip_one(&val, op, &pattern))
7209 });
7210 match result {
7211 StripResult::Scalar(s) => fusevm::Value::str(s),
7212 StripResult::Array(arr) => {
7213 let mapped: Vec<fusevm::Value> = arr.into_iter().map(fusevm::Value::str).collect();
7214 fusevm::Value::Array(mapped)
7215 }
7216 }
7217 });
7218
7219 // `$((expr))` — pops [expr_string], evaluates via MathEval which
7220 // honors integer-vs-float distinction (zsh-compatible). Returns
7221 // the result as Value::Str so it can be Concat'd into surrounding
7222 // word context.
7223 vm.register_builtin(BUILTIN_ARITH_EVAL, |vm, _argc| {
7224 let expr = vm.pop().to_str();
7225 let result = crate::ported::subst::arithsubst(&expr, "", "");
7226 fusevm::Value::str(result)
7227 });
7228
7229 // `$(cmd)` — pops [cmd_string], routes through
7230 // run_command_substitution which performs an in-process pipe-capture.
7231 // Avoids the Op::CmdSubst sub-chunk word-emit bug
7232 // (`printf "a\nb"` produced "anb" via that path). Returns trimmed
7233 // output (trailing newlines stripped per POSIX cmd-sub semantics).
7234 vm.register_builtin(BUILTIN_CMD_SUBST_TEXT, |vm, _argc| {
7235 let cmd = vm.pop().to_str();
7236 // Inherit live $? into the inner shell so cmd-subst sees the
7237 // parent's most recent exit. Same rationale as the mode-3
7238 // backtick path above.
7239 let live_status = vm.last_status;
7240 let result = with_executor(|exec| {
7241 exec.set_last_status(live_status);
7242 exec.run_command_substitution(&cmd)
7243 });
7244 // Mirror run_command_substitution's exec.last_status side
7245 // effect into the VM's live counter so a containing
7246 // assignment's BUILTIN_SET_VAR — which reads vm.last_status
7247 // — sees the cmd-subst's exit. Without this, `a=$(false);
7248 // echo $?` reads stale 0 (vm.last_status was zeroed by
7249 // compile_assign's prelude SetStatus, and run_cmd_subst only
7250 // updated exec.last_status). Pull the value back through
7251 // exec since it owns the canonical post-subst record.
7252 let cs_status = with_executor(|exec| exec.last_status());
7253 vm.last_status = cs_status;
7254 fusevm::Value::str(result)
7255 });
7256
7257 // Text-based word expansion. Pops [preserved_text, mode_byte].
7258 // mode_byte:
7259 // 0 = Default — expand_string + xpandbraces + expand_glob
7260 // 1 = DoubleQuoted — strip outer `"…"`, expand_string only
7261 // (no brace, no glob — DQ semantics)
7262 // 2 = SingleQuoted — strip outer `'…'`, no expansion
7263 // (kept for symmetry; Snull early-return covers most SQ)
7264 // 3 = AltBackquote — strip backticks, run as cmd-sub
7265 // Single result → Value::str; multi → Value::Array.
7266 vm.register_builtin(BUILTIN_EXPAND_TEXT, |vm, _argc| {
7267 let mode = vm.pop().to_int() as u8;
7268 let text = vm.pop().to_str();
7269 // Sync vm.last_status → exec.last_status so cmd-subst (mode 3)
7270 // and any nested $? reads inside singsub see the live `$?`
7271 // from the most recent VM op. Without this, cmd-subst inside
7272 // arg-eval saw a stale exec.last_status that was zeroed at
7273 // the start of the current statement. Direct port of zsh's
7274 // pre-cmdsubst lastval propagation per Src/exec.c:4770.
7275 let live_status = vm.last_status;
7276 with_executor(|exec| match mode {
7277 // Mode 1 = DoubleQuoted (argument context).
7278 // Mode 5 = DoubleQuoted in scalar-assignment context.
7279 // Both share the same DQ unescape pre-processing; mode 5
7280 // additionally bumps `in_scalar_assign` so subst_port's
7281 // paramsubst sees ssub=true and suppresses split flags
7282 // `(f)` / `(s:STR:)` / `(0)` per Src/subst.c:1759 +
7283 // Src/exec.c::addvars line 2546 (the PREFORK_SINGLE bit
7284 // C zsh sets when prefork-ing the assignment RHS).
7285 1 | 5 => {
7286 // DoubleQuoted: strip outer `"…"` if present. In DQ
7287 // context, `\` escapes the DQ-special chars `$`, `` ` ``,
7288 // `"`, `\`. zsh's expand_string expects the lexer's
7289 // `\0X` literal-marker for an already-escaped char, so
7290 // we pre-process: `\$` → `\0$`, `\\` → `\0\`, etc. Then
7291 // expand_string handles the rest.
7292 let inner = if text.len() >= 2 && text.starts_with('"') && text.ends_with('"') {
7293 &text[1..text.len() - 1]
7294 } else {
7295 text.as_str()
7296 };
7297 let mut prepped = String::with_capacity(inner.len());
7298 let mut chars = inner.chars().peekable();
7299 while let Some(c) = chars.next() {
7300 if c == '\\' {
7301 match chars.peek() {
7302 Some('$') | Some('`') | Some('"') | Some('\\') => {
7303 prepped.push('\x00');
7304 prepped.push(chars.next().unwrap());
7305 }
7306 _ => prepped.push(c),
7307 }
7308 } else {
7309 prepped.push(c);
7310 }
7311 }
7312 // Tell parameter-flag application that we're inside
7313 // double quotes — array-only flags ((o), (O), (n),
7314 // (i), (M), (u)) must be no-ops here per zsh.
7315 exec.in_dq_context += 1;
7316 if mode == 5 {
7317 exec.in_scalar_assign += 1;
7318 }
7319 let out = crate::ported::subst::singsub(&prepped);
7320 if mode == 5 {
7321 exec.in_scalar_assign -= 1;
7322 }
7323 exec.in_dq_context -= 1;
7324 fusevm::Value::str(out)
7325 }
7326 2 => {
7327 // SingleQuoted: pure literal, strip outer `'…'`.
7328 let inner = if text.len() >= 2 && text.starts_with('\'') && text.ends_with('\'') {
7329 &text[1..text.len() - 1]
7330 } else {
7331 text.as_str()
7332 };
7333 fusevm::Value::str(inner.to_string())
7334 }
7335 3 => {
7336 // Backquote command sub: strip outer backticks.
7337 // Word-split the result on IFS when the surrounding
7338 // word is unquoted — zsh: `print -l \`echo a b c\``
7339 // emits one arg per word. The $(…) path applies the
7340 // same split via BUILTIN_WORD_SPLIT after capture; do
7341 // the equivalent here for the `…` form.
7342 let inner = if text.len() >= 2 && text.starts_with('`') && text.ends_with('`') {
7343 &text[1..text.len() - 1]
7344 } else {
7345 text.as_str()
7346 };
7347 // Apply the live VM status before running the inner
7348 // shell so the inherited $? matches zsh's lastval
7349 // propagation.
7350 exec.set_last_status(live_status);
7351 let captured = exec.run_command_substitution(inner);
7352 let trimmed = captured.trim_end_matches('\n');
7353 if exec.in_dq_context > 0 {
7354 fusevm::Value::str(trimmed.to_string())
7355 } else {
7356 let ifs = exec
7357 .scalar("IFS")
7358 .unwrap_or_else(|| " \t\n".to_string());
7359 let parts: Vec<fusevm::Value> = trimmed
7360 .split(|c: char| ifs.contains(c))
7361 .filter(|s| !s.is_empty())
7362 .map(|s| fusevm::Value::str(s.to_string()))
7363 .collect();
7364 if parts.is_empty() {
7365 fusevm::Value::str(String::new())
7366 } else if parts.len() == 1 {
7367 parts.into_iter().next().unwrap()
7368 } else {
7369 fusevm::Value::Array(parts)
7370 }
7371 }
7372 }
7373 4 => {
7374 // HeredocBody: expand variables / command-subst / arith
7375 // but NOT glob or brace. Heredoc lines like `[42]` must
7376 // pass through verbatim — running them through the
7377 // default pipeline triggers NOMATCH on the literal.
7378 fusevm::Value::str(crate::ported::subst::singsub(&text))
7379 }
7380 _ => {
7381 // Default: full expansion pipeline.
7382 // Pre-process backslash-escapes to the `\x00X` literal-
7383 // marker form so expand_string suppresses variable
7384 // expansion on escaped specials: `\$` → literal `$`,
7385 // `\\` → literal `\`, `\`` → literal `` ` ``. Without
7386 // this, `echo \$a` ran `\` literally then expanded
7387 // `$a`, leaving a stray `\` that echo's escape
7388 // interpreter then turned into form-feed when followed
7389 // by `f`-like content.
7390 let mut prepped = String::with_capacity(text.len());
7391 let mut it = text.chars().peekable();
7392 while let Some(c) = it.next() {
7393 if c == '\\' {
7394 match it.peek() {
7395 Some('$') | Some('`') | Some('"') | Some('\'') | Some('\\') => {
7396 prepped.push('\x00');
7397 prepped.push(it.next().unwrap());
7398 }
7399 // Don't preprocess `\{` / `\}` here — the
7400 // brace-expansion stage has its own
7401 // has_balanced_escaped_braces detector that
7402 // strips the backslashes when both sides
7403 // are escaped. Touching them here would
7404 // hide them from that detector.
7405 _ => prepped.push(c),
7406 }
7407 } else {
7408 prepped.push(c);
7409 }
7410 }
7411 let expanded = crate::ported::subst::singsub(&prepped);
7412 let brace_expanded = vec![expanded.to_string()];
7413 // zsh stores the option as `glob` (default ON);
7414 // `setopt noglob` writes `glob=false`. Honor either
7415 // form so the dispatcher behaves the same as zsh.
7416 let noglob = crate::ported::options::opt_state_get("noglob").unwrap_or(false)
7417 || crate::ported::options::opt_state_get("GLOB").map(|v| !v).unwrap_or(false)
7418 || !crate::ported::options::opt_state_get("glob").unwrap_or(true);
7419 let parts: Vec<String> = brace_expanded
7420 .into_iter()
7421 .flat_map(|s| {
7422 // The lexer leaves glob metacharacters in their
7423 // META-encoded form: `*` → `\u{87}`, `?` →
7424 // `\u{86}`, `[` → `\u{91}`, etc. expand_string
7425 // doesn't untokenize them, so the literal-char
7426 // checks below (`s.contains('*')`) would miss
7427 // every real glob and skip expand_glob — that
7428 // bug let `echo *.toml` print the literal
7429 // `*.toml` because the META `\u{87}` never
7430 // matched the literal `*`. Untokenize once so
7431 // the metacharacter checks see the canonical
7432 // form. zsh's pattern.c expects `*` etc. as
7433 // bare chars at the glob layer.
7434 let s = crate::lex::untokenize(&s);
7435 // Skip glob expansion for assignment-shaped
7436 // words (`NAME=value`). zsh doesn't expand the
7437 // RHS of an assignment as a path glob unless
7438 // `setopt globassign` is set, and feeding such
7439 // words through expand_glob makes NOMATCH
7440 // (default ON) fire spuriously on
7441 // `integer i=2*3+1`, `path=*.rs`, etc.
7442 let is_assignment_shape = {
7443 let bytes = s.as_bytes();
7444 let mut i = 0;
7445 if !bytes.is_empty()
7446 && (bytes[0] == b'_' || bytes[0].is_ascii_alphabetic())
7447 {
7448 i += 1;
7449 while i < bytes.len()
7450 && (bytes[i] == b'_' || bytes[i].is_ascii_alphanumeric())
7451 {
7452 i += 1;
7453 }
7454 i < bytes.len() && bytes[i] == b'='
7455 } else {
7456 false
7457 }
7458 };
7459 // Also trigger expand_glob when the word ends
7460 // with a `(...)` qualifier suffix even without
7461 // any other glob metachar — `/etc/hosts(mh-100)`,
7462 // `path(.)`, etc.
7463 let has_qual_suffix =
7464 s.ends_with(')') && s.contains('(') && !s.contains('|');
7465 // extendedglob `^pat` (negation) and `pat~excl`
7466 // (exclusion). Trigger expand_glob so the runtime
7467 // can apply the appropriate filter. Both require
7468 // `setopt extendedglob` — runtime falls through
7469 // to literal if that's off.
7470 let extglob_meta =
7471 crate::ported::options::opt_state_get("extendedglob").unwrap_or(false)
7472 && (s.starts_with('^') || s.contains('~') || s.contains("/^"));
7473 let has_numeric_range = s.contains('<')
7474 && s.contains('>')
7475 && !crate::ported::pattern::extract_numeric_ranges(&s).is_empty();
7476 // Glob alternation `(a|b|c)` is a primary
7477 // zsh feature — `/etc/(passwd|hostname)`
7478 // should expand to file matches. Detected
7479 // by `(` ... `|` ... `)` shape; the actual
7480 // top-level-vs-nested check happens in
7481 // expand_glob_alternation.
7482 let has_alternation = s.contains('(') && s.contains('|') && s.contains(')');
7483 if !noglob
7484 && !is_assignment_shape
7485 && (s.contains('*')
7486 || s.contains('?')
7487 || s.contains('[')
7488 || has_qual_suffix
7489 || extglob_meta
7490 || has_numeric_range
7491 || has_alternation)
7492 {
7493 exec.expand_glob(&s)
7494 } else {
7495 vec![s]
7496 }
7497 })
7498 .collect();
7499 if parts.len() == 1 {
7500 let only = parts.into_iter().next().unwrap_or_default();
7501 // Empty unquoted expansion → drop the arg entirely
7502 // (zsh "remove empty unquoted words" rule). Returning
7503 // an empty Value::Array makes pop_args contribute zero
7504 // items. Direct port of subst.c's empty-elide pass at
7505 // the end of multsub which removes empty linknodes
7506 // from unquoted contexts. Quoted DQ/SQ paths (modes
7507 // 1/2/5) take separate arms above and always emit
7508 // Value::Str so the empty arg survives.
7509 if only.is_empty() {
7510 fusevm::Value::Array(Vec::new())
7511 } else {
7512 fusevm::Value::str(only)
7513 }
7514 } else {
7515 fusevm::Value::Array(parts.into_iter().map(fusevm::Value::str).collect())
7516 }
7517 }
7518 })
7519 });
7520
7521 // `${#name}` — pops [name]. Returns the value's element count for
7522 // arrays (indexed and assoc) or character length for scalars.
7523 vm.register_builtin(BUILTIN_PARAM_LENGTH, |vm, _argc| {
7524 let name_raw = vm.pop().to_str();
7525 // Strip `[@]` / `[*]` subscript suffix — `${#arr[@]}` and
7526 // `${#m[@]}` are element-count forms, same as `${#arr}` /
7527 // `${#m}`. Fast paths sometimes hand us the bare name and
7528 // sometimes leave the subscript attached.
7529 let name = name_raw
7530 .strip_suffix("[@]")
7531 .or_else(|| name_raw.strip_suffix("[*]"))
7532 .unwrap_or(&name_raw)
7533 .to_string();
7534 // `${#arr[N]}` — length of the Nth ELEMENT, not the array
7535 // count. Verified empirically: arr=(aa bb ccc); ${#arr[2]} → 2
7536 // in real zsh. Resolve the bare name + bracketed subscript
7537 // (with embedded `$VAR` references expanded) to a single
7538 // value, then count its chars. Skip `[@]` / `[*]` — those
7539 // were stripped above as splice forms.
7540 if let Some(open) = name.find('[') {
7541 if name.ends_with(']') && &name[open..] != "[@]" && &name[open..] != "[*]" {
7542 let bare = &name[..open];
7543 let raw_idx = &name[open + 1..name.len() - 1];
7544 let elem = with_executor(|exec| {
7545 // Expand `$VAR` / `${VAR}` references inside the
7546 // subscript before lookup (single dollar pass).
7547 let resolved_idx = expand_dollar_refs(raw_idx, exec);
7548 if let Some(arr) = exec.array(bare) {
7549 if let Ok(n) = resolved_idx.trim().parse::<i64>() {
7550 let len = arr.len() as i64;
7551 let idx = if n > 0 { n - 1 } else if n < 0 { len + n } else { -1 };
7552 if idx >= 0 && (idx as usize) < arr.len() {
7553 return arr[idx as usize].clone();
7554 }
7555 }
7556 return String::new();
7557 }
7558 if let Some(map) = exec.assoc(bare) {
7559 return map.get(resolved_idx.as_str()).cloned().unwrap_or_default();
7560 }
7561 String::new()
7562 });
7563 return fusevm::Value::str(elem.chars().count().to_string());
7564 }
7565 }
7566 let count = with_executor(|exec| {
7567 // ${#@} / ${#*} → count of positional params (= $#).
7568 // Without this, `@`/`*` fell through to `get_variable`
7569 // which returned the IFS-joined positional string and
7570 // we counted chars (5 for "a b c" instead of 3).
7571 if name == "@" || name == "*" || name == "argv" {
7572 return exec.pparams().len();
7573 }
7574 // Magic-array specials whose length is data-driven, not
7575 // taken from `exec.arrays`/`exec.assoc_arrays`. Direct
7576 // ports of the relevant `SPECIALPMDEF` entries:
7577 // - `errnos` → Src/Modules/system.c:902
7578 // - `commands` → Src/Modules/parameter.c
7579 // - `aliases` → Src/Modules/parameter.c
7580 // - `functions` → Src/Modules/parameter.c
7581 // - `parameters` → Src/Modules/parameter.c
7582 // - `options` → Src/Modules/parameter.c
7583 // - `sysparams` → Src/Modules/system.c:904
7584 match name.as_str() {
7585 "errnos" => return crate::modules::system::ERRNO_NAMES.len(),
7586 "epochtime" => return 2, // [seconds, nanoseconds]
7587 "commands" => return crate::ported::hashtable::cmdnamtab_lock()
7588 .read().map(|t| t.len()).unwrap_or(0),
7589 "aliases" => return exec.alias_entries().len(),
7590 "galiases" => return exec.global_alias_entries().len(),
7591 "saliases" => return exec.suffix_alias_entries().len(),
7592 "functions" => return exec.function_names().len(),
7593 "options" => return crate::ported::options::opt_state_len(),
7594 "sysparams" => return 3, // pid, ppid, procsubstpid
7595 // Magic-assoc lengths backed by canonical scanners.
7596 // Direct ports of parameter.c SPECIALPMDEF entries —
7597 // each scan callback emits one entry per node, so the
7598 // count is the length of the scan_magic_assoc_keys
7599 // collected list.
7600 "builtins" | "dis_builtins"
7601 | "dis_functions" | "dis_aliases"
7602 | "dis_galiases" | "dis_saliases" => {
7603 return crate::exec::scan_magic_assoc_keys(&name)
7604 .map(|v| v.len())
7605 .unwrap_or(0);
7606 }
7607 _ => {}
7608 }
7609 if let Some(arr) = exec.array(&name) {
7610 arr.len()
7611 } else if let Some(assoc) = exec.assoc(&name) {
7612 assoc.len()
7613 } else {
7614 exec.get_variable(&name).chars().count()
7615 }
7616 });
7617 fusevm::Value::str(count.to_string())
7618 });
7619
7620 // `${var/pat/repl}` / `${var//pat/repl}` / `${var/#pat/repl}` /
7621 // `${var/%pat/repl}` — Pops [name, pattern, replacement, op_byte].
7622 // op: 0=first, 1=all, 2=anchor-prefix (`/#`), 3=anchor-suffix (`/%`).
7623 vm.register_builtin(BUILTIN_PARAM_REPLACE, |vm, _argc| {
7624 let dq_flag = vm.pop().to_int() != 0;
7625 let op = vm.pop().to_int() as u8;
7626 let repl_raw = vm.pop().to_str();
7627 let pattern_raw = vm.pop().to_str();
7628 let name = vm.pop().to_str();
7629 // SUB_* flag bits set by the (M)/(R)/(B)/(E)/(N)/(S) flag-loop
7630 // arms. Direct port of zsh's getmatch() flag dispatch — these
7631 // alter the disposition of the match result:
7632 // M=0x08 — return matched portion
7633 // R=0x10 — return rest after match
7634 // B=0x20 — return 1-based start index
7635 // E=0x40 — return 1-based end index
7636 // N=0x80 — return match length
7637 // S=0x04 — substring search (anywhere) instead of anchored
7638 // Read once and consume so subsequent paramsubst calls see
7639 // a clean slate — direct port of subst.c flag-loop pattern.
7640 let (sub_match, sub_rest, sub_bind, sub_eind, sub_len, _sub_substr) = {
7641 let f = crate::ported::subst::sub_flags_get();
7642 crate::ported::subst::sub_flags_set(0);
7643 (
7644 (f & 0x0008) != 0, // c:2171 M
7645 (f & 0x0010) != 0, // c:2174 R
7646 (f & 0x0020) != 0, // c:2177 B
7647 (f & 0x0040) != 0, // c:2180 E
7648 (f & 0x0080) != 0, // c:2183 N
7649 (f & 0x0004) != 0, // c:2186 S
7650 )
7651 };
7652 // Both pattern and replacement get parameter / cmd-subst /
7653 // arith expansion before use (zsh semantics — `${s/$pat/X}`
7654 // resolves $pat).
7655 // Untokenize before pattern compile — zsh's lexer leaves
7656 // Snull/DQ markers and meta-encoded metachars in the
7657 // pattern stream. regex::Regex::new errors on those bytes,
7658 // and even when it compiles, it matches against tokenized
7659 // text rather than the user's literal pattern. Direct port
7660 // of bin_test's `untokenize(pattern)` call before patcompile.
7661 let pattern = crate::ported::subst::singsub(&pattern_raw);
7662 let pattern = crate::lex::untokenize(&pattern);
7663 // Replacement: full singsub with skip_filesub so a literal
7664 // leading `~` in the replacement reaches the output as-is
7665 // (per zsh, `${var/#pat/~}` keeps the tilde — the
7666 // p10k / oh-my-zsh idiom of replacing `$HOME` with `~` for
7667 // display). Was using a hand-rolled `expand_no_tilde` that
7668 // only handled `$VAR` / `${VAR}` references, missing
7669 // `$(cmd)` and `$((expr))` in templates like
7670 // `\${var//foo/$(date +%s)}`.
7671 // Inline `singsub-with-skip_filesub` — C zsh sets the flag
7672 // inline before calling singsub rather than wrapping in a
7673 // helper. Direct port of the prefork SUB_FLAG | SKIP_FILESUB
7674 // pattern. PORT.md: no helpers without C counterpart.
7675 let repl = with_executor(|exec| {
7676 let saved = crate::ported::subst::SKIP_FILESUB.with(|c| c.get());
7677 crate::ported::subst::SKIP_FILESUB.with(|c| c.set(true));
7678 let r = crate::ported::subst::singsub(&repl_raw);
7679 crate::ported::subst::SKIP_FILESUB.with(|c| c.set(saved));
7680 if crate::ported::utils::errflag.load(std::sync::atomic::Ordering::Relaxed) != 0 {
7681 exec.set_last_status(1);
7682 }
7683 r
7684 });
7685 let repl = crate::lex::untokenize(&repl);
7686 // Strip backslash escapes from the pattern. zsh: `\X` in a
7687 // ${var/pat/repl} pattern means "literal X" — the backslash
7688 // is removed and X is used as a literal char (regardless of
7689 // whether X is a pattern metachar). Without this, `${a//\:/-}`
7690 // tried to match the literal "\:" in $a which never matched.
7691 // We preserve `\\` (literal backslash) and `\X` for X in the
7692 // pattern-meta set, since regex compile expects those raw.
7693 let pattern = {
7694 let mut out = String::with_capacity(pattern.len());
7695 let mut it = pattern.chars().peekable();
7696 while let Some(c) = it.next() {
7697 if c == '\\' {
7698 if let Some(&nx) = it.peek() {
7699 // For non-meta chars, drop the backslash.
7700 // For metas keep the escape so regex still
7701 // matches them literally below.
7702 // Keep escape only for actual zsh pattern
7703 // metachars (the ones that have special pattern
7704 // meaning). `.` is regex-meta but NOT zsh-meta,
7705 // so `\.` drops the backslash → literal `.`.
7706 if matches!(nx, '?' | '*' | '[' | ']' | '(' | ')' | '|' | '\\') {
7707 out.push(c);
7708 } else {
7709 out.push(nx);
7710 it.next();
7711 }
7712 } else {
7713 out.push(c);
7714 }
7715 } else {
7716 out.push(c);
7717 }
7718 }
7719 out
7720 };
7721 // Inline pattern flags `(#i)` / `(#l)` / `(#I)` / `(#b)` apply
7722 // to ${var//pat/repl}. `(#b)` enables backref capture: each
7723 // `(...)` group in the pattern becomes accessible via
7724 // `${match[N]}` (1-based) in the replacement. Per
7725 // Src/pattern.c — the C source uses `pat_pure` flags +
7726 // `pat_subme` arrays; the Rust port plumbs through
7727 // `regex::Captures` and writes `state.arrays["match"]`
7728 // before each replacement-string expansion.
7729 // Inline glob-flag pre-parse — direct call to patgetglobflags
7730 // + bit-mask extraction (matches C pattern.c:1066+ inline).
7731 let (pattern, case_insensitive_repl, _l_flag_repl, _approx_repl, backref_mode) =
7732 if let Some((bits, _assert, consumed)) =
7733 crate::ported::pattern::patgetglobflags(&pattern)
7734 {
7735 let ci = (bits & crate::ported::zsh_h::GF_IGNCASE) != 0;
7736 let l = (bits & crate::ported::zsh_h::GF_LCMATCHUC) != 0;
7737 let errs = bits & 0xff;
7738 let approx = if errs != 0 { Some(errs as u32) } else { None };
7739 let br = (bits & crate::ported::zsh_h::GF_BACKREF) != 0;
7740 (pattern[consumed..].to_string(), ci, l, approx, br)
7741 } else {
7742 (pattern.clone(), false, false, None, false)
7743 };
7744 // zsh patterns in ${var/pat/repl} support `?`, `*`, `[...]`,
7745 // anchored `#`/`%` (handled via op codes 2/3). Compile to a
7746 // regex for the actual matching; falls back to plain string
7747 // when the pattern has no glob metas (faster).
7748 // Include `(` as a glob trigger — zsh's `(...)` is a grouping
7749 // (with `|` for alternation). `${a/(?)/X}` should match like
7750 // `${a/?/X}` (paren is the group). Without `(` in the trigger
7751 // set, paren patterns fell into the literal-string path and
7752 // matched nothing.
7753 // `#` (and its `##` repetition pair) is an extendedglob
7754 // postfix metachar — `a##` = one-or-more `a`. Include it
7755 // in the trigger set so `${var//a##/X}` routes through the
7756 // regex compile path instead of the literal-string fallback.
7757 // Bare `#` alone is non-meta — but it's safe to over-trigger
7758 // here because the regex compiler escapes literals it can't
7759 // interpret as quantifier postfix anyway.
7760 let has_glob = pattern
7761 .chars()
7762 .any(|c| matches!(c, '?' | '*' | '[' | ']' | '(' | '#'));
7763 // backref_mode (set by `(#b)` / `(#m)` / `(#M)` flags) needs
7764 // per-match capture iteration so `$match[N]` / `$MATCH` /
7765 // `$MBEGIN` / `$MEND` resolve PER-replacement against the
7766 // current capture. The literal-string replace path skips
7767 // captures entirely, so MATCH stays empty. Force the regex
7768 // path when backref_mode is set even for literal patterns.
7769 let glob_re: Option<regex::Regex> = if has_glob || case_insensitive_repl || backref_mode {
7770 // Convert the glob pattern to a regex string:
7771 // ? → . (any single char)
7772 // * → .* (any seq)
7773 // [...] → kept as-is (regex char class)
7774 // ( ) → kept as regex group; | as alternation
7775 // other regex metas → escaped
7776 let mut re = String::with_capacity(pattern.len() * 2);
7777 let mut chars = pattern.chars().peekable();
7778 // `#` / `##` extendedglob postfix detector for the
7779 // BUILTIN_PARAM_REPLACE pattern compile. Matches the
7780 // same handling in subst_port::glob_to_regex_capturing
7781 // and exec.rs::glob_match_static — direct port of zsh's
7782 // pattern.c Pound/POUND2 cases. Used by zinit's
7783 // main-message-formatter pattern `[^\}]##` (one-or-
7784 // more non-`}`).
7785 let consume_postfix = |chars: &mut std::iter::Peekable<std::str::Chars>| -> Option<&'static str> {
7786 if chars.peek() == Some(&'#') {
7787 chars.next();
7788 if chars.peek() == Some(&'#') {
7789 chars.next();
7790 Some("+")
7791 } else {
7792 Some("*")
7793 }
7794 } else {
7795 None
7796 }
7797 };
7798 while let Some(c) = chars.next() {
7799 match c {
7800 '?' => {
7801 re.push('.');
7802 if let Some(q) = consume_postfix(&mut chars) { re.push_str(q); }
7803 }
7804 '*' => {
7805 re.push_str(".*");
7806 if let Some(q) = consume_postfix(&mut chars) { re.push_str(q); }
7807 }
7808 '[' => {
7809 // Pass through to the closing ']' (already
7810 // valid regex syntax for most char classes).
7811 // zsh uses BOTH `[!...]` and `[^...]` for class
7812 // negation; regex only accepts `^`. Translate
7813 // a leading `!` after `[` to `^`. Track escape
7814 // state so `[\]…]` (escaped `]` inside class)
7815 // doesn't terminate the class on the FIRST `]`.
7816 // Direct port of zsh's pattern.c P_BRACT_END:
7817 // a backslash-quoted `]` inside a class stays
7818 // literal. Used by hist-substring's
7819 // `[\][()|\\*?#<>~^]` pattern.
7820 re.push('[');
7821 if chars.peek() == Some(&'!') {
7822 chars.next();
7823 re.push('^');
7824 }
7825 // First-char `]` is literal in zsh and regex
7826 // (POSIX rule), so allow it without closing.
7827 let mut first = true;
7828 let mut escaped = false;
7829 while let Some(cc) = chars.next() {
7830 if escaped {
7831 re.push(cc);
7832 escaped = false;
7833 first = false;
7834 continue;
7835 }
7836 if cc == '\\' {
7837 re.push(cc);
7838 escaped = true;
7839 continue;
7840 }
7841 if cc == ']' && !first {
7842 re.push(cc);
7843 break;
7844 }
7845 re.push(cc);
7846 first = false;
7847 }
7848 if let Some(q) = consume_postfix(&mut chars) { re.push_str(q); }
7849 }
7850 '\\' => {
7851 // `\\(#e)` / `\\(#s)` — escaped backslash
7852 // followed by end/start anchor. After
7853 // expand_string's `\x00\` preprocessing,
7854 // this arrives as `\(#e)` (one backslash
7855 // already consumed as escape-marker). Per
7856 // zsh's pattern.c, `\\` in a pattern is
7857 // escape-backslash (literal `\`). When that
7858 // literal `\` is followed by `(#e)` /
7859 // `(#s)`, emit `\\$` / `\\^`. Detected
7860 // here as 5-char `\(#e)` (one `\` then
7861 // `(#e)` which the (#e) arm below would
7862 // otherwise treat as anchor with a literal
7863 // `(` — losing the backslash). Used by
7864 // zinit's `(#b)((*)\\(#e)|(*))`.
7865 let mut peek = chars.clone();
7866 let p1 = peek.next();
7867 let p2 = peek.next();
7868 let p3 = peek.next();
7869 let p4 = peek.next();
7870 if p1 == Some('(')
7871 && p2 == Some('#')
7872 && (p3 == Some('e') || p3 == Some('s'))
7873 && p4 == Some(')')
7874 {
7875 re.push_str("\\\\");
7876 chars.next(); chars.next(); chars.next(); chars.next();
7877 re.push(if p3 == Some('e') { '$' } else { '^' });
7878 continue;
7879 }
7880 re.push('\\');
7881 if let Some(next) = chars.next() {
7882 re.push(next);
7883 }
7884 }
7885 // `(#e)` / `(#s)` end/start anchors — direct port
7886 // of zsh's pattern.c P_EOL / P_BOL tokens. 4-char
7887 // lookahead detects them; emit regex `$` / `^`.
7888 // Used by zinit's
7889 // `(#b)((*)\\(#e)|(*))` array-replace pattern.
7890 '(' if {
7891 let mut peek = chars.clone();
7892 let p1 = peek.next();
7893 let p2 = peek.next();
7894 let p3 = peek.next();
7895 p1 == Some('#')
7896 && (p2 == Some('e') || p2 == Some('s'))
7897 && p3 == Some(')')
7898 } =>
7899 {
7900 chars.next(); // consume '#'
7901 let kind = chars.next().unwrap(); // 'e' or 's'
7902 chars.next(); // consume ')'
7903 re.push(if kind == 'e' { '$' } else { '^' });
7904 }
7905 // `(`, `|` are zsh group/alternation operators
7906 // — keep them as regex equivalents. `)` may be
7907 // followed by `#`/`##` postfix applied to the
7908 // closed group (e.g. `(foo|bar)##` = one-or-more
7909 // of foo/bar).
7910 '(' | '|' => re.push(c),
7911 ')' => {
7912 re.push(c);
7913 if let Some(q) = consume_postfix(&mut chars) { re.push_str(q); }
7914 }
7915 // Regex meta chars that are NOT glob metas — escape
7916 // so the regex compiler treats them literally.
7917 '.' | '+' | '^' | '$' | '{' | '}' => {
7918 re.push('\\');
7919 re.push(c);
7920 }
7921 _ => {
7922 re.push(c);
7923 if let Some(q) = consume_postfix(&mut chars) { re.push_str(q); }
7924 }
7925 }
7926 }
7927 // Apply `(#i)` case-insensitive flag if it was present
7928 // in the original pattern. Same `(?i)` prefix as
7929 // glob_match_static uses.
7930 let final_re = if case_insensitive_repl {
7931 format!("(?i){}", re)
7932 } else {
7933 re
7934 };
7935 regex::Regex::new(&final_re).ok()
7936 } else {
7937 None
7938 };
7939 let one = |val: String| -> String {
7940 // SUB_M/R/B/E/N short-circuit — alter the disposition
7941 // before doing the actual replacement. Direct port of
7942 // zsh's getmatch() which returns one of these views
7943 // instead of the substituted string when the bit is set.
7944 // Matched-portion / rest / position / length variants
7945 // all skip the replacement template entirely.
7946 let any_disposition = sub_match || sub_rest || sub_bind || sub_eind || sub_len;
7947 if any_disposition {
7948 if let Some(ref rx) = glob_re {
7949 if let Some(m) = rx.find(&val) {
7950 if sub_match { return m.as_str().to_string(); }
7951 if sub_rest { return val[m.end()..].to_string(); }
7952 if sub_bind { return (m.start() + 1).to_string(); }
7953 if sub_eind { return m.end().to_string(); }
7954 if sub_len { return (m.end() - m.start()).to_string(); }
7955 } else {
7956 // No match: M/R return empty, B/E/N return 0.
7957 if sub_match || sub_rest { return String::new(); }
7958 return "0".to_string();
7959 }
7960 } else if let Some(pos) = val.find(pattern.as_str()) {
7961 let end = pos + pattern.len();
7962 if sub_match { return pattern.clone(); }
7963 if sub_rest { return val[end..].to_string(); }
7964 if sub_bind { return (pos + 1).to_string(); }
7965 if sub_eind { return end.to_string(); }
7966 if sub_len { return pattern.len().to_string(); }
7967 } else {
7968 if sub_match || sub_rest { return String::new(); }
7969 return "0".to_string();
7970 }
7971 }
7972 if let Some(ref rx) = glob_re {
7973 // Helper that runs ONE replacement: takes the
7974 // captures, populates `state.arrays["match"]`
7975 // (1-based indexing), then expands the replacement
7976 // template via `expand_string` so `$match[N]` in
7977 // the template resolves to the just-captured group.
7978 // Mirrors C zsh's pat_subme + addbackref handling
7979 // around Src/pattern.c (pattry, patmatch).
7980 let expand_repl_with_caps = |caps: ®ex::Captures| -> String {
7981 if backref_mode {
7982 with_executor(|exec| {
7983 // `(#b)` — per-group captures into `match[N]`
7984 // (1-based array). Also seed `MATCH` with the
7985 // whole-match text so `(#m)` plus `$MATCH` in
7986 // the replacement returns the matched portion.
7987 // Direct port of Src/pattern.c addbackref +
7988 // pat_pure_m which sets both views.
7989 let mut arr = Vec::with_capacity(caps.len());
7990 let mut begins = Vec::with_capacity(caps.len());
7991 let mut ends = Vec::with_capacity(caps.len());
7992 for i in 1..caps.len() {
7993 if let Some(m) = caps.get(i) {
7994 arr.push(m.as_str().to_string());
7995 begins.push((m.start() + 1).to_string());
7996 ends.push(m.end().to_string());
7997 } else {
7998 arr.push(String::new());
7999 begins.push("0".to_string());
8000 ends.push("0".to_string());
8001 }
8002 }
8003 exec.set_array("match".to_string(), arr);
8004 // mbegin/mend arrays — 1-based start
8005 // and end positions of each capture
8006 // group. Direct port of zsh's
8007 // pat_pure_m population.
8008 exec.set_array("mbegin".to_string(), begins);
8009 exec.set_array("mend".to_string(), ends);
8010 if let Some(m0) = caps.get(0) {
8011 exec.set_scalar("MATCH".to_string(), m0.as_str().to_string());
8012 exec.set_scalar("MBEGIN".to_string(), (m0.start() + 1).to_string());
8013 exec.set_scalar("MEND".to_string(), m0.end().to_string());
8014 }
8015 });
8016 crate::ported::subst::singsub(&repl_raw)
8017 } else {
8018 repl.clone()
8019 }
8020 };
8021 match op {
8022 0 => {
8023 if backref_mode {
8024 // `replacen` doesn't expose Captures —
8025 // reimplement: find first match, expand
8026 // replacement from its caps, splice.
8027 if let Some(caps) = rx.captures(&val) {
8028 let m = caps.get(0).unwrap();
8029 let r = expand_repl_with_caps(&caps);
8030 return format!("{}{}{}", &val[..m.start()], r, &val[m.end()..]);
8031 }
8032 val
8033 } else {
8034 rx.replacen(&val, 1, repl.as_str()).to_string()
8035 }
8036 }
8037 1 => {
8038 if backref_mode {
8039 // Iterate each match, build output piecewise.
8040 let mut out = String::with_capacity(val.len());
8041 let mut last = 0usize;
8042 for caps in rx.captures_iter(&val) {
8043 let m = caps.get(0).unwrap();
8044 out.push_str(&val[last..m.start()]);
8045 let r = expand_repl_with_caps(&caps);
8046 out.push_str(&r);
8047 last = m.end();
8048 }
8049 out.push_str(&val[last..]);
8050 out
8051 } else {
8052 rx.replace_all(&val, repl.as_str()).to_string()
8053 }
8054 }
8055 2 => {
8056 // Anchored prefix: only match at start.
8057 if let Some(caps) = rx.captures(&val) {
8058 let m = caps.get(0).unwrap();
8059 if m.start() == 0 {
8060 let r = if backref_mode {
8061 expand_repl_with_caps(&caps)
8062 } else {
8063 repl.clone()
8064 };
8065 return format!("{}{}", r, &val[m.end()..]);
8066 }
8067 }
8068 val
8069 }
8070 3 => {
8071 // Anchored suffix: last match whose end is val.len().
8072 let mut last_caps: Option<regex::Captures> = None;
8073 for caps in rx.captures_iter(&val) {
8074 let m = caps.get(0).unwrap();
8075 if m.end() == val.len() {
8076 last_caps = Some(caps);
8077 }
8078 }
8079 if let Some(caps) = last_caps {
8080 let m = caps.get(0).unwrap();
8081 let r = if backref_mode {
8082 expand_repl_with_caps(&caps)
8083 } else {
8084 repl.clone()
8085 };
8086 return format!("{}{}", &val[..m.start()], r);
8087 }
8088 val
8089 }
8090 _ => val,
8091 }
8092 } else {
8093 match op {
8094 0 => val.replacen(&pattern, &repl, 1),
8095 1 => val.replace(&pattern, &repl),
8096 2 => {
8097 if val.starts_with(&pattern) {
8098 format!("{}{}", repl, &val[pattern.len()..])
8099 } else {
8100 val
8101 }
8102 }
8103 3 => {
8104 if val.ends_with(&pattern) {
8105 format!("{}{}", &val[..val.len() - pattern.len()], repl)
8106 } else {
8107 val
8108 }
8109 }
8110 _ => val,
8111 }
8112 }
8113 };
8114 // Array case: per-element replacement (default), or
8115 // join-then-replace when in DQ context. zsh: `"${a/o/O}"`
8116 // for `a=(one two three)` joins to "one two three", then
8117 // does the FIRST replacement only -> "One two three".
8118 // Unquoted `${a/o/O}` per-element first -> "One twO three".
8119 let arr_val = with_executor(|exec| exec.array(&name));
8120 if let Some(arr) = arr_val {
8121 if dq_flag {
8122 let joined = arr.join(" ");
8123 return fusevm::Value::str(one(joined));
8124 }
8125 let mapped: Vec<fusevm::Value> = arr
8126 .into_iter()
8127 .map(|s| fusevm::Value::str(one(s)))
8128 .collect();
8129 return fusevm::Value::Array(mapped);
8130 }
8131 let val = with_executor(|exec| exec.get_variable(&name));
8132 fusevm::Value::str(one(val))
8133 });
8134
8135 vm.register_builtin(BUILTIN_REGISTER_COMPILED_FN, |vm, argc| {
8136 let args = pop_args(vm, argc);
8137 let mut iter = args.into_iter();
8138 let name = iter.next().unwrap_or_default();
8139 let body_b64 = iter.next().unwrap_or_default();
8140 let body_source = iter.next().unwrap_or_default();
8141 let line_base_str = iter.next().unwrap_or_default();
8142 let line_base: i64 = line_base_str.parse().unwrap_or(0);
8143 let bytes = base64_decode(&body_b64);
8144 let status = match bincode::deserialize::<fusevm::Chunk>(&bytes) {
8145 Ok(chunk) => with_executor(|exec| {
8146 let def_file = exec.scriptfilename.clone();
8147 if !body_source.is_empty() {
8148 exec.function_source.insert(name.clone(), body_source.clone());
8149 }
8150 exec.function_line_base.insert(name.clone(), line_base);
8151 exec.function_def_file.insert(name.clone(), def_file);
8152 // PFA-SMR aspect: every `name() {}` / `function name { }`
8153 // funnels through here at compile time. Emit one record
8154 // with the function name + raw body source.
8155 #[cfg(feature = "recorder")]
8156 if crate::recorder::is_enabled() {
8157 let ctx = exec.recorder_ctx();
8158 let body = if body_source.is_empty() {
8159 None
8160 } else {
8161 Some(body_source.as_str())
8162 };
8163 crate::recorder::emit_function(&name, body, ctx);
8164 }
8165 // Mirror into canonical shfunctab so scanfunctions /
8166 // ${(k)functions} / functions builtin see user defs.
8167 // C: exec.c:funcdef → shfunctab->addnode(ztrdup(name),shf).
8168 if let Ok(mut tab) =
8169 crate::ported::hashtable::shfunctab_lock().write()
8170 {
8171 let shf = crate::ported::hashtable::shfunc_with_body(
8172 &name,
8173 &body_source,
8174 );
8175 tab.add(shf);
8176 }
8177 exec.functions_compiled.insert(name, chunk);
8178 0
8179 }),
8180 Err(_) => 1,
8181 };
8182 Value::Status(status)
8183 });
8184
8185 // Wire the ShellHost so direct shell ops (Op::Glob, Op::TildeExpand,
8186 // Op::ExpandParam, Op::CmdSubst, Op::CallFunction, etc.) route through
8187 // ZshrsHost back into the executor.
8188 vm.set_shell_host(Box::new(ZshrsHost));
8189}
8190
8191impl ZshrsHost {
8192 /// True iff `c` can be a `(j:…:)` / `(s:…:)` delimiter — non-alphanumeric,
8193 /// non-underscore. Restricting to punctuation avoids `(jL)` consuming `L`
8194 /// as a delim instead of as the next flag.
8195 fn is_zsh_flag_delim(c: char) -> bool {
8196 !c.is_ascii_alphanumeric() && c != '_'
8197 }
8198}
8199
8200fn expand_dollar_refs(s: &str, exec: &crate::ported::exec::ShellExecutor) -> String {
8201 // Single-pass `$VAR` / `${VAR}` expansion for subscript bodies.
8202 // Mirrors the small subset of paramsubst needed when the BUILTIN_
8203 // PARAM_LENGTH handler resolves `${#arr[$i]}`.
8204 let bytes: Vec<char> = s.chars().collect();
8205 let mut out = String::with_capacity(s.len());
8206 let mut i = 0;
8207 while i < bytes.len() {
8208 if bytes[i] != '$' {
8209 out.push(bytes[i]);
8210 i += 1;
8211 continue;
8212 }
8213 if i + 1 >= bytes.len() {
8214 out.push('$');
8215 i += 1;
8216 continue;
8217 }
8218 let next = bytes[i + 1];
8219 if next == '{' {
8220 if let Some(close) = bytes[i + 2..].iter().position(|&c| c == '}') {
8221 let name: String = bytes[i + 2..i + 2 + close].iter().collect();
8222 out.push_str(&exec.get_variable(&name));
8223 i += 2 + close + 1;
8224 continue;
8225 }
8226 }
8227 if next.is_ascii_alphabetic() || next == '_' {
8228 let start = i + 1;
8229 let mut end = start;
8230 while end < bytes.len() && (bytes[end].is_ascii_alphanumeric() || bytes[end] == '_') {
8231 end += 1;
8232 }
8233 let name: String = bytes[start..end].iter().collect();
8234 out.push_str(&exec.get_variable(&name));
8235 i = end;
8236 continue;
8237 }
8238 out.push('$');
8239 i += 1;
8240 }
8241 out
8242}
8243
8244fn pop_args(vm: &mut fusevm::VM, argc: u8) -> Vec<String> {
8245 let mut popped: Vec<fusevm::Value> = Vec::with_capacity(argc as usize);
8246 for _ in 0..argc {
8247 popped.push(vm.pop());
8248 }
8249 popped.reverse();
8250 let mut args: Vec<String> = Vec::with_capacity(popped.len());
8251 for v in popped {
8252 match v {
8253 fusevm::Value::Array(items) => {
8254 for item in items {
8255 args.push(item.to_str());
8256 }
8257 }
8258 other => args.push(other.to_str()),
8259 }
8260 }
8261 // `expand_glob` set the glob-failed cell when a no-match glob in
8262 // this command's argv triggered the `nomatch` error. For BUILTIN
8263 // commands (zsh: errflag persists in the shell process), the
8264 // entire script aborts with status 1 — `echo /no_match_*` exits
8265 // before printing anything. External commands hit the same flag
8266 // in `host_exec_external` instead, which only fails the command
8267 // and lets the script continue (zsh's fork inherits-but-resets
8268 // errflag semantics). We only land here for builtins, so abort.
8269 let glob_failed = with_executor(|exec| {
8270 let f = exec.current_command_glob_failed.get();
8271 if f {
8272 exec.current_command_glob_failed.set(false);
8273 exec.set_last_status(1);
8274 }
8275 f
8276 });
8277 if glob_failed {
8278 std::process::exit(1);
8279 }
8280 // `$_` tracks the last argument of the PREVIOUSLY executed
8281 // command (zsh / bash convention). Promote the deferred value
8282 // into `$_` BEFORE this command runs (so `echo $_` reads the
8283 // prior command's last arg) then stash THIS command's last arg
8284 // for the next dispatch.
8285 let new_last = args.last().cloned();
8286 with_executor(|exec| {
8287 if let Some(prev) = exec.pending_underscore.take() {
8288 exec.set_scalar("_".to_string(), prev);
8289 }
8290 if let Some(last) = new_last {
8291 exec.pending_underscore = Some(last);
8292 }
8293 });
8294 args
8295}
8296
8297/// zsh dispatch order is alias → function → builtin → external. The
8298/// compiler emits direct CallBuiltin ops for known builtin names for
8299/// perf, which silently skips a user function that shadows the same
8300/// name (e.g. `echo() { ... }; echo hi` would run the C builtin
8301/// without this check). Returns Some(status) when the call is routed
8302/// to the user function; the builtin handler should fall through to
8303/// its native impl when None.
8304fn try_user_fn_override(name: &str, args: &[String]) -> Option<i32> {
8305 let has_fn = with_executor(|exec| {
8306 exec.functions_compiled.contains_key(name) || exec.function_exists(name)
8307 });
8308 if !has_fn {
8309 return None;
8310 }
8311 Some(with_executor(|exec| {
8312 exec.dispatch_function_call(name, args).unwrap_or(127)
8313 }))
8314}
8315
8316// IDs 281 (was BUILTIN_EXPAND_WORD_RUNTIME) and 282 (was
8317// BUILTIN_REGISTER_FUNCTION) were legacy JSON-AST bridges. ZshCompiler
8318// emits BUILTIN_EXPAND_TEXT (314) and BUILTIN_REGISTER_COMPILED_FN
8319// (305) instead. The IDs stay reserved in this gap so future builtins
8320// don't reuse them.
8321
8322/// Builtin ID for `${name}` reads — routes through `ShellExecutor::get_variable`
8323/// which knows about special params (`$?`, `$@`, `$#`, `$1..$9`), shell vars
8324/// (`self.variables`), arrays, and env. Replaces emission of `Op::GetVar` for
8325/// shell variable names so nested VMs (function calls) see the same storage.
8326pub const BUILTIN_GET_VAR: u16 = 283;
8327
8328/// Builtin ID for `name=value` assignments — pops [name, value] and stores
8329/// into `executor.variables`. Replaces `Op::SetVar` emission for the same
8330/// reason: the storage must be visible to both bytecode and tree-walker code,
8331/// across nested VM boundaries.
8332pub const BUILTIN_SET_VAR: u16 = 284;
8333
8334/// Builtin ID for pipeline execution. Pops N sub-chunk indices from the stack;
8335/// each index points into `vm.chunk.sub_chunks` (compiled stage bodies). Forks
8336/// N children, wires stdin/stdout between them via pipes, runs each stage's
8337/// bytecode on a fresh VM in its child, parent waits for all and pushes the
8338/// last stage's exit status. This is bytecode-native pipeline execution —
8339/// no tree-walker delegation.
8340pub const BUILTIN_RUN_PIPELINE: u16 = 285;
8341
8342/// Builtin ID for `Array → String` joining. Pops one value: if it's an Array,
8343/// joins its string-coerced elements with a single space; otherwise passes
8344/// through. Used after `Op::Glob` to convert the pattern's matched paths into
8345/// the single argv-token form the bytecode word model expects (no per-word
8346/// splitting yet — that's a future phase).
8347pub const BUILTIN_ARRAY_JOIN: u16 = 286;
8348
8349/// Builtin ID for `cmd &` background execution. IDs 287/288/289 are reserved
8350/// for the planned array work in Phase G1 (SET_ARRAY/SET_ASSOC/ARRAY_INDEX),
8351/// so this lands at 290. Pops one sub-chunk index; forks; child detaches
8352/// (`setsid`), runs the sub-chunk on a fresh VM, exits with last_status; parent
8353/// returns Status(0) immediately. Job-table registration (so `jobs`/`fg`/`wait`
8354/// can see the pid) is deferred to Phase G6 — fire-and-forget for now.
8355pub const BUILTIN_RUN_BG: u16 = 290;
8356
8357/// Indexed-array assignment: `arr=(a b c)`. Compile_simple emits N element
8358/// pushes followed by name push, then `CallBuiltin(BUILTIN_SET_ARRAY, N+1)`.
8359/// The handler pops args (last popped = name in our pushing order) and stores
8360/// `Vec<String>` into `executor.arrays`. Tree-walker callers see the same
8361/// storage. Any prior scalar binding in `executor.variables` for `name` is
8362/// removed so `${name}` (scalar context) consistently reflects the array's
8363/// first element via `get_variable`.
8364pub const BUILTIN_SET_ARRAY: u16 = 287;
8365
8366/// Single-key set on an associative array: `foo[key]=val`. Stack (top-down):
8367/// [name, key, value]. Stores `value` into `executor.assoc_arrays[name][key]`,
8368/// creating the outer entry if missing. compile_simple detects `var[...]=...`
8369/// in assignments and emits this builtin.
8370pub const BUILTIN_SET_ASSOC: u16 = 288;
8371
8372/// `${arr[idx]}` — single-element array index. Pops two args:
8373/// stack: [name, idx_str]
8374/// Returns the indexed element as Value::str. Indexing semantics: zsh is
8375/// 1-based by default; bash is 0-based. We follow zsh.
8376/// Special idx values: `@` and `*` return the whole array as Value::Array
8377/// (which fuses correctly via the Op::Exec splice for argv splice).
8378pub const BUILTIN_ARRAY_INDEX: u16 = 289;
8379
8380/// `${#arr[@]}` and `${#arr}` (when arr is an array name) — array length.
8381/// Pops one arg: name. Returns Value::str of len.
8382pub const BUILTIN_ARRAY_LENGTH: u16 = 291;
8383
8384/// `${arr[@]}` — splice all elements as a Value::Array. Pops one arg: name.
8385/// The Array gets flattened by Op::Exec/ExecBg/CallFunction into argv.
8386pub const BUILTIN_ARRAY_ALL: u16 = 292;
8387
8388/// Flatten one level of Value::Array nesting. Pops N values; for each, if it's
8389/// a Value::Array, its elements are appended directly; otherwise the value is
8390/// appended as-is. Pushes a single Value::Array of the flattened result. Used
8391/// by the for-loop word-list compile path: when a word like `${arr[@]}`
8392/// produces a nested Array, this lets `for i in ${arr[@]}` iterate over the
8393/// inner elements rather than the outer single-element array.
8394pub const BUILTIN_ARRAY_FLATTEN: u16 = 293;
8395
8396/// `coproc [name] { body }` — bidirectional pipe to async child. Pops a name
8397/// (optional, "" for default) and a sub-chunk index. Creates two pipes, forks,
8398/// child redirects its fd 0/1 to the inner ends and runs the body, parent
8399/// stores [write_fd, read_fd] into the named array (default `COPROC`). Caller
8400/// closes the fds and `wait`s when done. Job-table integration deferred to
8401/// Phase G6 alongside the bg `&` work.
8402pub const BUILTIN_RUN_COPROC: u16 = 294;
8403
8404/// `arr+=(d e f)` — append N elements to an existing indexed array. Compile
8405/// emits N element pushes + name push, then `CallBuiltin(295, N+1)`. Handler
8406/// drains args (last popped = name), extends `executor.arrays[name]` (creates
8407/// the entry if missing). Mirrors zsh's `+=` semantics for indexed arrays.
8408pub const BUILTIN_APPEND_ARRAY: u16 = 295;
8409
8410/// `select var in words; do body; done` — interactive numbered-menu loop.
8411/// Compile emits N word pushes + var-name push + sub-chunk index push, then
8412/// `CallBuiltin(296, N+2)`. Handler prints `1) word1\n2) word2\n...` to
8413/// stderr, prints `$PROMPT3` (default `?# `) to stderr, reads a line from
8414/// stdin. On EOF returns 0. On a valid 1-based number, sets `var` to the
8415/// chosen word, runs the sub-chunk, then redisplays the menu and loops. On
8416/// invalid input redraws the menu without running the body. `break` from
8417/// inside the body exits the loop (handled by the body's own bytecode).
8418pub const BUILTIN_RUN_SELECT: u16 = 296;
8419
8420/// `m[k]+=value` — append onto an existing assoc-array value (string concat).
8421/// If the key doesn't exist, behaves like SET_ASSOC. Stack: [name, key, value].
8422pub const BUILTIN_APPEND_ASSOC: u16 = 298;
8423
8424/// `break` from inside a body that runs on a sub-VM (select, future loop-via-
8425/// builtin constructs). Sets `executor.loop_signal = Some(LoopSignal::Break)`.
8426/// Outer-loop builtins drain the flag after each body run and exit early.
8427pub const BUILTIN_SET_BREAK: u16 = 299;
8428
8429/// `continue` from inside a sub-VM body. Sets the signal to Continue. Outer
8430/// loop builtins drain + skip-to-next-iteration.
8431pub const BUILTIN_SET_CONTINUE: u16 = 300;
8432
8433/// Brace expansion: `{a,b,c}` → 3 values, `{1..5}` → 5 values, `{01..05}` →
8434/// zero-padded numerics, `{a..e}` → letter range. Pops one string, returns
8435/// Value::Array of expansions (empty array → original string preserved).
8436pub const BUILTIN_BRACE_EXPAND: u16 = 301;
8437
8438/// Glob qualifier filter: `*(qualifier)` filters glob results by predicate.
8439/// Pops [pattern, qualifier_string]. Returns Value::Array of matching paths.
8440pub const BUILTIN_GLOB_QUALIFIED: u16 = 302;
8441
8442/// Re-export the regex_match host method as a builtin so `[[ s =~ pat ]]`
8443/// works even when fusevm's Op::RegexMatch isn't routed (compat fallback).
8444pub const BUILTIN_REGEX_MATCH: u16 = 303;
8445
8446/// Word-split a string on IFS (default: whitespace). Pops one string,
8447/// returns Value::Array of fields. Used in array-literal context where
8448/// `arr=($(cmd))` should expand cmd's stdout into multiple elements.
8449pub const BUILTIN_WORD_SPLIT: u16 = 304;
8450
8451/// Register a pre-compiled fusevm chunk as a function. Stack: [name,
8452/// base64-bincode-of-Chunk]. Used by compile_zsh's compile_funcdef to
8453/// register functions parsed via parse_init+parse without going through the
8454/// ShellCommand JSON serialization path.
8455pub const BUILTIN_REGISTER_COMPILED_FN: u16 = 305;
8456pub const BUILTIN_VAR_EXISTS: u16 = 306;
8457/// Phase 1 native param-modifier builtins. Each takes a fixed argv shape
8458/// and returns the modified value as Value::Str. Replaces the runtime
8459/// ShellWord round-trip via BUILTIN_EXPAND_WORD_RUNTIME for the common
8460/// shapes.
8461///
8462/// `${var:-default}` / `${var:=default}` / `${var:?error}` / `${var:+alt}`
8463/// — pop [name, op_byte, rhs]. op_byte: 0=`:-`, 1=`:=`, 2=`:?`, 3=`:+`.
8464pub const BUILTIN_PARAM_DEFAULT_FAMILY: u16 = 307;
8465/// `${var:offset[:length]}` — pop [name, offset, length] (length=-1 means
8466/// "rest of value"; negative offset counts from end).
8467pub const BUILTIN_PARAM_SUBSTRING: u16 = 308;
8468/// `${var#pat}` / `${var##pat}` / `${var%pat}` / `${var%%pat}` — pop
8469/// [name, pattern, op_byte]. op_byte: 0=`#`, 1=`##`, 2=`%`, 3=`%%`.
8470pub const BUILTIN_PARAM_STRIP: u16 = 309;
8471/// `${var/pat/repl}` / `${var//pat/repl}` / `${var/#pat/repl}` /
8472/// `${var/%pat/repl}` — pop [name, pattern, replacement, op_byte].
8473/// op_byte: 0=first, 1=all, 2=anchor-prefix, 3=anchor-suffix.
8474pub const BUILTIN_PARAM_REPLACE: u16 = 310;
8475/// `${#name}` — character length of a scalar value, or element count
8476/// of an indexed/assoc array. Pops \[name\], returns count as Value::Str.
8477pub const BUILTIN_PARAM_LENGTH: u16 = 311;
8478/// `$((expr))` arithmetic substitution. Pops \[expr_string\], evaluates
8479/// via the executor's MathEval (integer-aware), returns result as
8480/// Value::Str. Bypasses ArithCompiler's float-only Op::Div path so
8481/// `$((10/3))` returns "3" not "3.333...".
8482pub const BUILTIN_ARITH_EVAL: u16 = 312;
8483/// `$(cmd)` command substitution. Pops \[cmd_string\], runs through
8484/// `run_command_substitution` which compiles via parse_init+parse + ZshCompiler
8485/// and captures stdout via an in-process pipe. Returns trimmed output
8486/// as Value::Str. Avoids the sub-chunk word-emit quoting bug in the
8487/// raw Op::CmdSubst path.
8488pub const BUILTIN_CMD_SUBST_TEXT: u16 = 313;
8489/// Text-based word expansion. Pops \[preserved_text\]: the word with
8490/// quotes preserved (Dnull→`"`, Snull→`'`, Bnull→`\`), runs
8491/// `expand_string` (variable + cmd-sub + arith) then `xpandbraces`
8492/// then `expand_glob`. Returns Value::str (single match) or
8493/// Value::Array (multi-match brace/glob).
8494pub const BUILTIN_EXPAND_TEXT: u16 = 314;
8495
8496/// `[[ a -ef b ]]` — same-inode test. Stack: [a, b]. Pushes Bool true iff
8497/// both paths resolve to the same `(dev, inode)` pair (zsh + bash semantics).
8498pub const BUILTIN_SAME_FILE: u16 = 315;
8499
8500/// `[[ a -nt b ]]` — file `a` newer than file `b` (mtime strict).
8501/// Stack: [path_a, path_b]. Pushes Bool. zsh-compatible "missing"
8502/// rules: if both exist, compare mtime; if only `a` exists → true;
8503/// otherwise false.
8504pub const BUILTIN_FILE_NEWER: u16 = 324;
8505
8506/// `[[ a -ot b ]]` — mirror of `-nt`. If both exist, compare mtime;
8507/// if only `b` exists → true; otherwise false.
8508pub const BUILTIN_FILE_OLDER: u16 = 325;
8509
8510/// `[[ -k path ]]` — sticky bit (S_ISVTX) set on path.
8511pub const BUILTIN_HAS_STICKY: u16 = 326;
8512/// `[[ -u path ]]` — setuid bit (S_ISUID).
8513pub const BUILTIN_HAS_SETUID: u16 = 327;
8514/// `[[ -g path ]]` — setgid bit (S_ISGID).
8515pub const BUILTIN_HAS_SETGID: u16 = 328;
8516/// `[[ -O path ]]` — owned by effective UID.
8517pub const BUILTIN_OWNED_BY_USER: u16 = 329;
8518/// `[[ -G path ]]` — owned by effective GID.
8519pub const BUILTIN_OWNED_BY_GROUP: u16 = 330;
8520/// `[[ -N path ]]` — file modified since last accessed (atime <= mtime).
8521pub const BUILTIN_FILE_MODIFIED_SINCE_ACCESS: u16 = 341;
8522
8523/// `name+=val` (no parens) — runtime-dispatched append.
8524/// If name is an indexed array → push val as element.
8525/// If name is an assoc array → error (zsh requires `(k v)` form).
8526/// Else → scalar concat (existing SET_VAR behavior).
8527pub const BUILTIN_APPEND_SCALAR_OR_PUSH: u16 = 331;
8528
8529/// `[[ -c path ]]` — character device.
8530pub const BUILTIN_IS_CHARDEV: u16 = 332;
8531/// `[[ -b path ]]` — block device.
8532pub const BUILTIN_IS_BLOCKDEV: u16 = 333;
8533/// `[[ -p path ]]` — FIFO / named pipe.
8534pub const BUILTIN_IS_FIFO: u16 = 334;
8535/// `[[ -S path ]]` — socket.
8536pub const BUILTIN_IS_SOCKET: u16 = 335;
8537pub const BUILTIN_ERREXIT_CHECK: u16 = 336;
8538pub const BUILTIN_PARAM_SUBSTRING_EXPR: u16 = 337;
8539pub const BUILTIN_XTRACE_LINE: u16 = 338;
8540pub const BUILTIN_ARRAY_JOIN_STAR: u16 = 339;
8541pub const BUILTIN_SET_RAW_OPT: u16 = 340;
8542
8543/// `time { compound; ... }` — wall-clock-time the sub-chunk and print
8544/// elapsed seconds. Stack: [sub_chunk_idx as Int]. Runs the sub-chunk
8545/// on the current VM (so positional/local state is shared) and prints
8546/// the timing summary to stderr in zsh's format. Pushes Status.
8547pub const BUILTIN_TIME_SUBLIST: u16 = 316;
8548
8549/// `{name}>file` / `{name}<file` / `{name}>>file` — named-fd allocation.
8550/// Stack: [path, varid, op_byte]. Opens `path` per `op_byte`, gets the
8551/// new fd (≥10 in zsh; we use libc::open with O_CLOEXEC bit cleared so
8552/// the inherited fd survives Command::new spawns), stores the fd number
8553/// as a string in `$varid`. Pushes Status (0 success, 1 error).
8554pub const BUILTIN_OPEN_NAMED_FD: u16 = 317;
8555
8556/// Word-segment concat that does cartesian-product distribution over
8557/// arrays. Stack: [lhs, rhs]. Used for RC_EXPAND_PARAM `${arr}` and
8558/// explicit-distribute forms (`${^arr}`, `${(@)…}`).
8559///
8560/// - both scalar: `Value::str(a + b)` (fast path, identical to Op::Concat)
8561/// - lhs Array, rhs scalar: `Value::Array([a + rhs for a in lhs])`
8562/// - lhs scalar, rhs Array: `Value::Array([lhs + b for b in rhs])`
8563/// - both Array: cartesian product `[a + b for a in lhs for b in rhs]`
8564pub const BUILTIN_CONCAT_DISTRIBUTE: u16 = 318;
8565
8566/// Forced-distribute concat — like `BUILTIN_CONCAT_DISTRIBUTE` but
8567/// always distributes cartesian regardless of the `rcexpandparam`
8568/// option. Emitted by the segments fast-path when an
8569/// `is_distribute_expansion` segment is present (`${^arr}`,
8570/// `${(@)arr}`, `${(s.…)arr}` etc.) per zsh: the source-level
8571/// distribution flag overrides the option default.
8572/// Direct port of Src/subst.c:1875 `case Hat: nojoin = 1` and the
8573/// `rcexpandparam` test bypass for the explicit-distribute flags.
8574pub const BUILTIN_CONCAT_DISTRIBUTE_FORCED: u16 = 522;
8575
8576/// Capture current `last_status` into the `TRY_BLOCK_ERROR` variable.
8577/// Emitted between the try block and the always block of `{ … } always
8578/// { … }` so the finally arm can read $TRY_BLOCK_ERROR.
8579pub const BUILTIN_SET_TRY_BLOCK_ERROR: u16 = 320;
8580pub const BUILTIN_RESTORE_TRY_BLOCK_STATUS: u16 = 432;
8581pub const BUILTIN_BEGIN_INLINE_ENV: u16 = 433;
8582pub const BUILTIN_END_INLINE_ENV: u16 = 434;
8583
8584/// `[[ -o option ]]` — shell-option-set test. Stack: \[option_name\].
8585/// Normalizes the name (strip underscores, lowercase) and reads
8586/// `exec.options`. Pushes Bool.
8587pub const BUILTIN_OPTION_SET: u16 = 321;
8588
8589/// `${var:#pattern}` — array filter: remove elements matching `pattern`.
8590/// Stack: [name, pattern]. For scalar `var`, returns empty if it matches
8591/// the pattern, else the value. For array `var`, returns Array of
8592/// non-matching elements.
8593pub const BUILTIN_PARAM_FILTER: u16 = 322;
8594
8595/// `a[i]=(elements)` / `a[i,j]=(elements)` / `a[i]=()` —
8596/// subscripted-array assign with array-literal RHS. Stack:
8597/// [...elements, name, key]. Empty elements + single-int key `a[i]=()`
8598/// removes that element. Comma-key `a[i,j]=(...)` splices.
8599pub const BUILTIN_SET_SUBSCRIPT_RANGE: u16 = 323;
8600
8601/// `[[ -X file ]]` for unknown unary test op `-X`. Stack: \[op_name\].
8602/// Emits zsh's `unknown condition: -X` diagnostic to stderr and
8603/// pushes Bool(false). Without this, unknown conditions silently
8604/// returned false matching neither zsh's error format nor the
8605/// expected status code (zsh returns 2 for parse error).
8606pub const BUILTIN_UNKNOWN_COND: u16 = 324;
8607
8608/// `[[ -t fd ]]` — fd-is-a-tty check. Stack: \[fd_string\].
8609/// Routes through libc::isatty. Pushes Bool.
8610pub const BUILTIN_IS_TTY: u16 = 325;
8611
8612/// Update `$LINENO` to track the source line of the next statement.
8613/// Stack: \[n\] (the line number from `ZshPipe.lineno`). Direct port
8614/// of zsh's `lineno` global tracking (Src/input.c:330) — the
8615/// compiler emits one of these per top-level pipe so `$LINENO`
8616/// reflects the source position at runtime. ID 342 picked because
8617/// the previous `326` collided with `BUILTIN_HAS_STICKY` (the file
8618/// has several other duplicate IDs — 325 has two as well — but
8619/// fixing those is out of scope for this port).
8620pub const BUILTIN_SET_LINENO: u16 = 342;
8621
8622/// Pop a scalar from the VM stack, run expand_glob on it, push the
8623/// result as Value::Array. Used by the segment-concat compile path
8624/// when var refs concatenate with glob meta literals (`$D/*`,
8625/// `${prefix}*`, etc.) — those skip the bridge's pathname-expansion
8626/// pass and would otherwise leak the glob meta to argv as a literal.
8627pub const BUILTIN_GLOB_EXPAND: u16 = 343;
8628
8629/// Push a `CmdState` token onto the command-context stack. Direct
8630/// port of zsh's `cmdpush(int cmdtok)` (Src/prompt.c:1623). The
8631/// stack is consulted by `%_` in PS4/prompt expansion to produce
8632/// the cumulative control-flow-context labels (`if`, `then`,
8633/// `cmdand`, `cmdor`, `cmdsubst`, …) that `zsh -x` xtrace shows
8634/// in the trace prefix. Compile_zsh emits push/pop pairs around
8635/// each compound command (if/while/[[…]]/((…))/$(…) etc.).
8636/// Token is a `CmdState as u8`.
8637pub const BUILTIN_CMD_PUSH: u16 = 344;
8638
8639/// Pop the top of the command-context stack. Direct port of zsh's
8640/// `cmdpop(void)` (Src/prompt.c:1631).
8641pub const BUILTIN_CMD_POP: u16 = 345;
8642
8643/// Emit an xtrace line built from the top `argc` values on the VM
8644/// stack, peeked WITHOUT consuming. Used to trace simple commands
8645/// AFTER expansion, so `echo for $i` shows as `echo for a` / `echo
8646/// for b`. Direct port of Src/exec.c:2055-2066.
8647pub const BUILTIN_XTRACE_ARGS: u16 = 346;
8648
8649/// Trace one assignment: emits `name=<quoted-value> ` (no newline)
8650/// to xtrerr if XTRACE is on. Coalesces with subsequent
8651/// XTRACE_ASSIGN / XTRACE_ARGS calls onto the SAME line via the
8652/// `XTRACE_DONE_PS4` flag so `a=1 b=2 echo $a $b` produces:
8653/// `<PS4>a=1 b=2 echo 1 2\n`
8654/// matching C zsh's `execcmd_exec` body (Src/exec.c:2517-2582):
8655/// xtr = isset(XTRACE);
8656/// if (xtr) { printprompt4(); doneps4 = 1; }
8657/// while (assign) {
8658/// if (xtr) fprintf(xtrerr, "%s=", name);
8659/// ... eval value ...
8660/// if (xtr) { quotedzputs(val, xtrerr); fputc(' ', xtrerr); }
8661/// }
8662///
8663/// Stack contract on entry: [..., name, value]. Both peeked, NOT
8664/// consumed (the matching SET_VAR call pops them after). argc = 2.
8665pub const BUILTIN_XTRACE_ASSIGN: u16 = 525;
8666
8667/// Emit a trailing `\n` + flush iff XTRACE is on AND PS4 was
8668/// emitted by an earlier XTRACE_ASSIGN this line. Used at the end
8669/// of compile_simple's assignment-only path so the trace line gets
8670/// terminated. Mirrors C's exec.c:3397-3399 (the assign-only return
8671/// path through execcmd_exec which does `fputc('\n', xtrerr);
8672/// fflush(xtrerr)`).
8673///
8674/// Stack: untouched. argc = 0.
8675pub const BUILTIN_XTRACE_NEWLINE: u16 = 526;
8676
8677/// Bridge into subst_port::substitute_brace_array for nested forms
8678/// that need to PRESERVE array shape across the expand_string
8679/// boundary. Stack: `[content_string]`. Returns Value::Array of the
8680/// per-element words. Used by the compile path for
8681/// `${(@)<nested>...##pat}` shapes — the standard substitute_brace
8682/// returns String which collapses array→scalar; this builtin
8683/// preserves the multi-word output via paramsubst's third return
8684/// (`nodes` vec, the C source's `aval` thread).
8685pub const BUILTIN_BRIDGE_BRACE_ARRAY: u16 = 347;
8686
8687/// Word-segment concat with FIRST/LAST sticking. Stack: [lhs, rhs].
8688/// Used for default unquoted splice forms (`${arr[@]}`, `$@`, `$*`)
8689/// where prefix sticks to first element only and suffix to last only.
8690///
8691/// Distribution table:
8692/// - both scalar: `Value::str(a + b)` (fast path)
8693/// - lhs scalar, rhs Array(b₀..bₙ): `Value::Array([lhs+b₀, b₁, …, bₙ])`
8694/// - lhs Array(a₀..aₙ), rhs scalar: `Value::Array([a₀, …, aₙ₋₁, aₙ+rhs])`
8695/// - both Array: `Value::Array([a₀, …, aₙ₋₁, aₙ+b₀, b₁, …, bₙ])`
8696/// (last of lhs merges with first of rhs; the rest stay separate)
8697///
8698/// This is the default zsh semantics for `print -l X${arr[@]}Y` →
8699/// "Xa", "b", "cY" — three distinct args, surrounding text only on ends.
8700pub const BUILTIN_CONCAT_SPLICE: u16 = 319;
8701
8702/// `${(flags)name}` — zsh parameter expansion flags. Stack: [name, flags].
8703/// Flags applied left-to-right. Supported subset (high-value, used by zpwr):
8704///
8705/// `L` — lowercase the value (scalar; or each element if array)
8706/// `U` — uppercase
8707/// `j:sep:` — join array with `sep` (delim is the char after `j`)
8708/// `s:sep:` — split scalar on `sep` (returns Value::Array)
8709/// `f` — split on newlines (shorthand for `s.\n.`)
8710/// `o` — sort array ascending
8711/// `O` — sort array descending
8712/// `P` — indirect: read name's value as another var name, return that's value
8713/// `@` — keep as array (returns Value::Array — useful before `j` etc.)
8714/// `k` — keys of assoc array
8715/// `v` — values of assoc array
8716/// `#` — word count (array length as scalar)
8717///
8718/// Flags can stack: `(jL)` joins then lowercases; `(s.,.U)` splits on `,`
8719/// then uppercases each element. The long-tail flags (`q`, `qq`, `qqq` for
8720/// quoting, `A` for assoc, `%` for prompt expansion, `e`/`g` for re-eval,
8721/// `n`/`p` for numeric, `t` for type, etc.) are deferred — they hit the
8722/// runtime fallback via the catch-all expansion path.
8723pub const BUILTIN_PARAM_FLAG: u16 = 297;
8724
8725/// `ShellHost` implementation that delegates to the current `ShellExecutor`
8726/// via the `with_executor` thread-local.
8727///
8728/// Construct fresh on each VM run (it carries no state itself). The VM
8729/// dispatches host method calls during `vm.run()`, and `with_executor`
8730/// resolves to the executor pointer set by `ExecutorContext::enter`.
8731/// fusevm-host implementation tying bytecode ops to the
8732/// shell executor.
8733/// zshrs-original — no C counterpart. C zsh has no bytecode VM
8734/// to host; everything runs through `execlist()`/`execpline()`
8735/// directly (Src/exec.c lines 1349/1668).
8736pub struct ZshrsHost;
8737
8738impl fusevm::ShellHost for ZshrsHost {
8739 fn glob(&mut self, pattern: &str, _recursive: bool) -> Vec<String> {
8740 with_executor(|exec| exec.expand_glob(pattern))
8741 }
8742
8743 fn tilde_expand(&mut self, s: &str) -> String {
8744 with_executor(|exec| s.to_string())
8745 }
8746
8747 fn brace_expand(&mut self, s: &str) -> Vec<String> {
8748 // Direct call to the canonical brace expander
8749 // (Src/glob.c::xpandbraces port at glob.rs:1678). Was
8750 // routing through singsub which uses PREFORK_SINGLE — that
8751 // flag explicitly suppresses brace expansion in subst.c:166,
8752 // so `print X{1,2,3}Y` returned the literal string.
8753 //
8754 // brace_ccl: respect the BRACE_CCL option which the bracket-
8755 // class form `{a-z}` requires. Pull from executor options.
8756 let brace_ccl = with_executor(|exec|
8757 crate::ported::options::opt_state_get("braceccl").unwrap_or(false));
8758 crate::ported::glob::xpandbraces(s, brace_ccl)
8759 }
8760
8761 fn str_match(&mut self, s: &str, pattern: &str) -> bool {
8762 // Shell glob match — `*`, `?`, `[...]`, alternation. Used by `[[ x = pat ]]`,
8763 // `case` arms, and any other point that compares against a glob pattern.
8764 crate::exec::glob_match_static(s, pattern)
8765 }
8766
8767 fn expand_param(&mut self, name: &str, _modifier: u8, _args: &[fusevm::Value]) -> fusevm::Value {
8768 // Sole funnel: route through `getsparam` matching C zsh's
8769 // `getsparam(name)` → `getvalue` → `getstrvalue` →
8770 // `Param.gsu->getfn` dispatch (Src/params.c:3076 / 2335).
8771 //
8772 // The lookup chain (GSU dispatch + variables + env + array-
8773 // join) lives in `params::getsparam`; subst.rs and this
8774 // bridge both call into it so the logic is in exactly one
8775 // place — mirroring C's "every read goes through getsparam"
8776 // architecture. fuseVM bytecode triggers this bridge when
8777 // the VM hits a PARAM opcode, equivalent to C's wordcode VM
8778 // resolving a parameter read during `exec.c` execution.
8779 //
8780 // Modifier handling: the `_modifier` / `_args` parameters
8781 // are populated by the bytecode compiler but applied by
8782 // separate VM opcodes (LENGTH/STRIP/SUBST/etc.) downstream
8783 // of this fetch — matching C's split between getsparam
8784 // (value fetch) and paramsubst's modifier-walk loop. This
8785 // bridge is the value-fetch step only.
8786 let val_str = crate::ported::params::getsparam(name)
8787 .unwrap_or_default();
8788 fusevm::Value::str(val_str)
8789 }
8790
8791 fn regex_match(&mut self, s: &str, regex: &str) -> bool {
8792 // Untokenize the pattern + subject before compiling. zsh's
8793 // lexer emits Snull/DQ markers around quoted regions; if a
8794 // single-quoted regex like `'([a-z]+)([0-9]+)'` reaches us
8795 // with the Snull bytes still present, regex::Regex::new
8796 // returns Err (the markers aren't valid pattern syntax).
8797 // Direct port of zsh's bin_test path which calls untokenize()
8798 // on both operands before handing to the regex compiler
8799 // (Src/cond.c:cond_match).
8800 let regex = crate::lex::untokenize(regex);
8801 let s = crate::lex::untokenize(s);
8802 let s = s.as_str();
8803 let regex = regex.as_str();
8804 // Compile (cached) and run captures so we can populate the
8805 // zsh-side magic vars: `$MATCH` (full match), `$match[N]`
8806 // (capture groups), and `$mbegin`/`$mend` (1-based offsets).
8807 let mut cache = REGEX_CACHE.lock();
8808 let re = if let Some(re) = cache.get(regex) {
8809 re.clone()
8810 } else {
8811 match regex::Regex::new(regex) {
8812 Ok(re) => {
8813 cache.insert(regex.to_string(), re.clone());
8814 re
8815 }
8816 Err(_) => return false,
8817 }
8818 };
8819 drop(cache);
8820 match re.captures(s) {
8821 Some(caps) => {
8822 let full = caps
8823 .get(0)
8824 .map(|m| m.as_str().to_string())
8825 .unwrap_or_default();
8826 let full_begin = caps
8827 .get(0)
8828 .map(|m| (s[..m.start()].chars().count() + 1).to_string())
8829 .unwrap_or_else(|| "0".to_string());
8830 let full_end = caps
8831 .get(0)
8832 .map(|m| s[..m.end()].chars().count().to_string())
8833 .unwrap_or_else(|| "0".to_string());
8834 let mut group_strs: Vec<String> = Vec::new();
8835 let mut begins: Vec<String> = Vec::new();
8836 let mut ends: Vec<String> = Vec::new();
8837 for i in 1..caps.len() {
8838 if let Some(m) = caps.get(i) {
8839 group_strs.push(m.as_str().to_string());
8840 begins.push((s[..m.start()].chars().count() + 1).to_string());
8841 ends.push(s[..m.end()].chars().count().to_string());
8842 } else {
8843 group_strs.push(String::new());
8844 begins.push("0".to_string());
8845 ends.push("0".to_string());
8846 }
8847 }
8848 with_executor(|exec| {
8849 exec.set_scalar("MATCH".to_string(), full);
8850 exec.set_scalar("MBEGIN".to_string(), full_begin);
8851 exec.set_scalar("MEND".to_string(), full_end);
8852 exec.set_array("match".to_string(), group_strs);
8853 exec.set_array("mbegin".to_string(), begins);
8854 exec.set_array("mend".to_string(), ends);
8855 });
8856 true
8857 }
8858 None => false,
8859 }
8860 }
8861
8862 fn process_sub_in(&mut self, sub: &fusevm::Chunk) -> String {
8863 // Run the sub-chunk synchronously (in the current executor context),
8864 // capture stdout into a temp file, return the path. Synchronous is
8865 // simpler and avoids the thread-local-executor limitation that
8866 // spawned threads can't see. Common consumers (`diff`, `cat`,
8867 // `comm`) read the file once anyway.
8868 let fifo_path = format!(
8869 "/tmp/zshrs_psub_{}_{}",
8870 std::process::id(),
8871 with_executor(|e| {
8872 let n = e.process_sub_counter;
8873 e.process_sub_counter += 1;
8874 n
8875 })
8876 );
8877 let _ = std::fs::remove_file(&fifo_path);
8878 let f = match std::fs::File::create(&fifo_path) {
8879 Ok(f) => f,
8880 Err(_) => return fifo_path,
8881 };
8882 let saved = unsafe { libc::dup(libc::STDOUT_FILENO) };
8883 unsafe {
8884 libc::dup2(f.as_raw_fd(), libc::STDOUT_FILENO);
8885 }
8886 crate::fusevm_disasm::maybe_print_stdout("process_subst_in", sub);
8887 let mut vm = fusevm::VM::new(sub.clone());
8888 register_builtins(&mut vm);
8889 vm.set_shell_host(Box::new(ZshrsHost));
8890 let _ = vm.run();
8891 let _ = std::io::stdout().flush();
8892 unsafe {
8893 libc::dup2(saved, libc::STDOUT_FILENO);
8894 libc::close(saved);
8895 }
8896 fifo_path
8897 }
8898
8899 fn process_sub_out(&mut self, sub: &fusevm::Chunk) -> String {
8900 // `>(cmd)` — consumer reads stdin from a FIFO that the parent
8901 // writes to. Create a real named pipe, fork a child that
8902 // dup2s the read end onto stdin and runs the sub-chunk; return
8903 // the FIFO path to the parent so it writes there.
8904 let fifo_path = format!(
8905 "/tmp/zshrs_psub_out_{}_{}",
8906 std::process::id(),
8907 with_executor(|e| {
8908 let n = e.process_sub_counter;
8909 e.process_sub_counter += 1;
8910 n
8911 })
8912 );
8913 let _ = std::fs::remove_file(&fifo_path);
8914 let cpath = match CString::new(fifo_path.clone()) {
8915 Ok(c) => c,
8916 Err(_) => return fifo_path,
8917 };
8918 if unsafe { libc::mkfifo(cpath.as_ptr(), 0o600) } != 0 {
8919 // Fall back to plain file if mkfifo fails.
8920 let _ = std::fs::write(&fifo_path, "");
8921 return fifo_path;
8922 }
8923 let sub = sub.clone();
8924 let fifo_for_child = fifo_path.clone();
8925 match unsafe { libc::fork() } {
8926 -1 => {
8927 let _ = std::fs::remove_file(&fifo_path);
8928 }
8929 0 => {
8930 // Child: open FIFO for read, dup onto stdin, run sub-chunk, exit.
8931 if let Ok(f) = std::fs::OpenOptions::new().read(true).open(&fifo_for_child) {
8932 let fd = f.as_raw_fd();
8933 unsafe {
8934 libc::dup2(fd, libc::STDIN_FILENO);
8935 }
8936 }
8937 crate::fusevm_disasm::maybe_print_stdout("process_subst_out:child", &sub);
8938 let mut vm = fusevm::VM::new(sub);
8939 register_builtins(&mut vm);
8940 vm.set_shell_host(Box::new(ZshrsHost));
8941 let _ = vm.run();
8942 unsafe { libc::_exit(0) };
8943 }
8944 _ => {
8945 // Parent — return path; child handles cleanup of the FIFO
8946 // once stdin EOFs. (The path may leak if the parent never
8947 // writes; acceptable for common `>(cmd)` idioms.)
8948 }
8949 }
8950 fifo_path
8951 }
8952
8953 fn subshell_begin(&mut self) {
8954 with_executor(|exec| {
8955 // libc::umask returns the previous mask AND sets the new
8956 // one; call with current value to read without changing.
8957 let cur_umask = unsafe {
8958 let m = libc::umask(0o022);
8959 libc::umask(m);
8960 m as u32
8961 };
8962 // Snapshot paramtab + hashed-storage too (step 1 of the
8963 // store unification mirrors writes there; restoring only
8964 // the HashMaps leaks subshell-scoped writes to the parent
8965 // via paramtab readers like `paramsubst → vars_get`).
8966 let paramtab_snap = crate::ported::params::paramtab().read().ok()
8967 .map(|t| t.clone())
8968 .unwrap_or_default();
8969 let paramtab_hashed_snap = crate::ported::params::paramtab_hashed_storage()
8970 .lock().ok()
8971 .map(|m| m.clone())
8972 .unwrap_or_default();
8973 exec.subshell_snapshots.push(SubshellSnapshot {
8974 paramtab: paramtab_snap,
8975 paramtab_hashed_storage: paramtab_hashed_snap,
8976 positional_params: exec.pparams(),
8977 env_vars: std::env::vars().collect(),
8978 // Save the LOGICAL pwd ($PWD env), not `current_dir()`'s
8979 // symlink-resolved path. zsh's subshell isolation per
8980 // Src/exec.c at the `entersubsh` path treats `pwd` (the
8981 // shell-tracked logical PWD) as the carrier — see
8982 // `Src/builtin.c:1239-1242` where cd writes the logical
8983 // dest into `pwd`. Falling back to current_dir() only
8984 // when PWD is unset matches `setupvals` at
8985 // `Src/init.c:1100+`.
8986 cwd: std::env::var("PWD").ok()
8987 .map(std::path::PathBuf::from)
8988 .or_else(|| std::env::current_dir().ok()),
8989 umask: cur_umask,
8990 traps: exec.traps.clone(),
8991 });
8992 // Subshell starts with EXIT trap cleared so the parent's
8993 // EXIT handler doesn't fire when the subshell ends. zsh:
8994 // each subshell has its own trap context. Other signals
8995 // are inherited (well, parent's are still in place — but
8996 // a trap set INSIDE the subshell shouldn't leak out).
8997 exec.traps.remove("EXIT");
8998 let level = exec
8999 .scalar("ZSH_SUBSHELL")
9000 .and_then(|s| s.parse::<i32>().ok())
9001 .unwrap_or(0);
9002 exec.set_scalar("ZSH_SUBSHELL".to_string(), (level + 1).to_string());
9003 });
9004 }
9005
9006 fn subshell_end(&mut self) {
9007 // Fire subshell's EXIT trap BEFORE restoring parent state so
9008 // the trap body sees the subshell's vars and exit status. zsh
9009 // forks for `(...)` so the trap runs in the child process,
9010 // before exit. We mirror by running it here, just before the
9011 // pop+restore. REMOVE the trap before firing so the inner
9012 // execute_script doesn't fire it again at its own end.
9013 let exit_trap_body = with_executor(|exec| exec.traps.remove("EXIT"));
9014 if let Some(body) = exit_trap_body {
9015 // Execute the trap body. Errors during trap execution
9016 // don't bubble — zsh ignores trap-body errors.
9017 with_executor(|exec| {
9018 let _ = exec.execute_script(&body);
9019 });
9020 }
9021 with_executor(|exec| {
9022 if let Some(snap) = exec.subshell_snapshots.pop() {
9023 // Restore paramtab + hashed storage so subshell-scoped
9024 // writes via setsparam/setaparam/sethparam don't leak
9025 // to the parent via paramtab readers.
9026 if let Some(tab) = crate::ported::params::paramtab().write().ok().as_deref_mut() {
9027 *tab = snap.paramtab;
9028 }
9029 if let Some(m) = crate::ported::params::paramtab_hashed_storage()
9030 .lock().ok().as_deref_mut() {
9031 *m = snap.paramtab_hashed_storage;
9032 }
9033 exec.set_pparams(snap.positional_params);
9034 // Restore the OS env to its pre-subshell state.
9035 // Removes any `export` writes the subshell made, and
9036 // restores any vars the subshell unset. Without this
9037 // `(export y=sub)` would leak `y` to the parent shell.
9038 let current: HashMap<String, String> = std::env::vars().collect();
9039 for k in current.keys() {
9040 if !snap.env_vars.contains_key(k) {
9041 std::env::remove_var(k);
9042 }
9043 }
9044 for (k, v) in &snap.env_vars {
9045 if current.get(k) != Some(v) {
9046 std::env::set_var(k, v);
9047 }
9048 }
9049 if let Some(cwd) = snap.cwd {
9050 let _ = std::env::set_current_dir(&cwd);
9051 // Resync $PWD env so a parent `pwd` doesn't read
9052 // the cwd the subshell `cd`'d into.
9053 std::env::set_var("PWD", &cwd);
9054 }
9055 // Restore umask. zsh's `(umask 077)` doesn't leak to
9056 // parent because the subshell forks; we run in-process
9057 // so we manually reset.
9058 unsafe {
9059 libc::umask(snap.umask as libc::mode_t);
9060 }
9061 // Restore parent's traps (the subshell's own traps die
9062 // with it). zsh: `(trap "X" USR1)` doesn't leak the
9063 // USR1 trap out of the subshell.
9064 exec.traps = snap.traps;
9065 }
9066 });
9067 }
9068
9069 fn redirect(&mut self, fd: u8, op: u8, target: &str) {
9070 // Apply a redirection at the OS level for the next command/builtin.
9071 // The host tracks saved fds in a per-executor stack so a future
9072 // `with_redirects_end` can restore. For now, this is a thin wrapper
9073 // that performs the dup2; pairing with explicit save/restore is
9074 // delivered by `with_redirects_begin/end`.
9075 with_executor(|exec| exec.host_apply_redirect(fd, op, target));
9076 }
9077
9078 fn with_redirects_begin(&mut self, count: u8) {
9079 with_executor(|exec| exec.host_redirect_scope_begin(count));
9080 }
9081
9082 fn with_redirects_end(&mut self) {
9083 with_executor(|exec| exec.host_redirect_scope_end());
9084 }
9085
9086 fn heredoc(&mut self, content: &str) {
9087 // C `Src/exec.c:4641` — `parsestr(&buf)` runs parameter +
9088 // command substitution on the heredoc body. The lexer's
9089 // quoted-delimiter detection (`<<'EOF'`) routes through the
9090 // `Op::HereDoc` path in `compile_zsh.rs` which short-circuits
9091 // before reaching here; unquoted forms route through the
9092 // BUILTIN_EXPAND_TEXT mode-4 emit path that calls singsub.
9093 // This handler covers the verbatim/quoted case.
9094 with_executor(|exec| exec.host_set_pending_stdin(content.to_string()));
9095 }
9096
9097 fn herestring(&mut self, content: &str) {
9098 // Shell semantics: herestring appends a newline. `<<<` body
9099 // substitution (`Src/exec.c:4655 getherestr` calls
9100 // `quotesubst` + `untokenize`) lands here verbatim; the
9101 // upstream compiler routes through `Op::HereString` after
9102 // BUILTIN_EXPAND_TEXT for the substitution pass, so callers
9103 // of `host.herestring` see the already-expanded form.
9104 let mut s = content.to_string();
9105 s.push('\n');
9106 with_executor(|exec| exec.host_set_pending_stdin(s));
9107 }
9108
9109 fn exec(&mut self, args: Vec<String>) -> i32 {
9110 // Track `$_` as the last argument of the last command (zsh /
9111 // bash convention). Empty arglists leave it untouched.
9112 if let Some(last) = args.last() {
9113 with_executor(|exec| {
9114 exec.set_scalar("_".to_string(), last.clone());
9115 });
9116 }
9117 // Route external command spawning through `executor.execute_external`
9118 // so intercepts (AOP before/after/around), command_hash lookups,
9119 // pre/postexec hooks, and zsh-specific fork-then-exec all apply.
9120 // Without this override, fusevm's default `host.exec` calls
9121 // `Command::new` directly, bypassing zshrs's dispatch logic.
9122 with_executor(|exec| exec.host_exec_external(&args))
9123 }
9124
9125 fn cmd_subst(&mut self, sub: &fusevm::Chunk) -> String {
9126 // Run the sub-chunk on a nested VM with the same host wired up,
9127 // capturing stdout. The current executor remains active via the
9128 // thread-local — the nested VM uses CallBuiltin to dispatch shell
9129 // ops back through `with_executor`.
9130 let (read_end, write_end) = match os_pipe::pipe() {
9131 Ok(p) => p,
9132 Err(_) => return String::new(),
9133 };
9134 let saved_stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
9135 if saved_stdout < 0 {
9136 return String::new();
9137 }
9138 let write_fd = std::os::unix::io::AsRawFd::as_raw_fd(&write_end);
9139 unsafe {
9140 libc::dup2(write_fd, libc::STDOUT_FILENO);
9141 }
9142 drop(write_end);
9143
9144 crate::fusevm_disasm::maybe_print_stdout("host.cmd_subst", sub);
9145 let mut vm = fusevm::VM::new(sub.clone());
9146 register_builtins(&mut vm);
9147 vm.set_shell_host(Box::new(ZshrsHost));
9148 let _ = vm.run();
9149 let cmd_status = vm.last_status;
9150
9151 unsafe {
9152 libc::dup2(saved_stdout, libc::STDOUT_FILENO);
9153 libc::close(saved_stdout);
9154 }
9155
9156 // Inner cmd's status not propagated for the same reason as
9157 // run_command_substitution — see GAPS.md.
9158 let _ = cmd_status;
9159
9160 let mut buf = String::new();
9161 let mut reader = read_end;
9162 let _ = reader.read_to_string(&mut buf);
9163 // Strip trailing newlines (POSIX command substitution semantics)
9164 while buf.ends_with('\n') {
9165 buf.pop();
9166 }
9167 buf
9168 }
9169
9170 fn call_function(&mut self, name: &str, args: Vec<String>) -> Option<i32> {
9171 // zsh-bundled rename helpers + zcalc: short-circuit BEFORE the
9172 // function/autoload lookup so the autoloaded zsh source (which
9173 // can hang zshrs's parser on zsh-specific syntax) never runs.
9174 // Native Rust impls live in builtin_zmv / builtin_zcalc.
9175 match name {
9176 "zmv" => {
9177 return Some(crate::extensions::ext_builtins::zmv(&args, "mv"));
9178 }
9179 "zcp" => {
9180 return Some(crate::extensions::ext_builtins::zmv(&args, "cp"));
9181 }
9182 "zln" => {
9183 return Some(crate::extensions::ext_builtins::zmv(&args, "ln"));
9184 }
9185 "zcalc" => {
9186 return Some(crate::extensions::ext_builtins::zcalc(&args));
9187 }
9188 // Daemon-managed z* builtins — thin IPC wrappers. Short-circuit BEFORE
9189 // the function-lookup path so a missing daemon doesn't fall through to
9190 // "command not found". The name list is owned by the daemon crate
9191 // (zshrs_daemon::builtins::ZSHRS_BUILTIN_NAMES); routing through
9192 // try_dispatch keeps this site zero-touch as new z* builtins land.
9193 n if crate::daemon::builtins::is_zshrs_builtin(n) => {
9194 let argv: Vec<String> = std::iter::once(name.to_string()).chain(args).collect();
9195 return Some(crate::daemon::builtins::try_dispatch(n, &argv).unwrap_or(1));
9196 }
9197 _ => {}
9198 }
9199
9200 // Alias check first: `alias g='echo hi'; g` rewrites to `echo hi`
9201 // before normal function/external dispatch. The expansion is
9202 // re-parsed + compiled + run on a nested VM with `args` appended.
9203 // Without this branch, aliases would be silently ignored at
9204 // run-time and `g` would fall through to "command not found".
9205 // Skip when this alias is mid-expansion already — zsh's lexer
9206 // disables an alias inside its own body (so `alias ls='ls -la'`
9207 // works without recursion). We do the same via a HashSet guard
9208 // since we expand at run time, not parse time.
9209 // C uses the `alias.inuse` field on the alias node itself
9210 // (`Src/zsh.h:1256` `struct alias { ... int inuse; }`) — the
9211 // lexer bumps it before splicing the body and clears it after,
9212 // so a recursive use within the body sees `inuse != 0` and
9213 // refuses to re-expand. Mirror that here against the canonical
9214 // `aliastab` instead of a side HashSet on ShellExecutor.
9215 let already_expanding = crate::ported::hashtable::aliastab_lock()
9216 .read()
9217 .ok()
9218 .and_then(|tab| tab.get(name).map(|a| a.inuse != 0))
9219 .unwrap_or(false);
9220 let alias_body = if already_expanding {
9221 None
9222 } else {
9223 with_executor(|exec| exec.alias(name))
9224 };
9225 if let Some(body) = alias_body {
9226 let combined = if args.is_empty() {
9227 body
9228 } else {
9229 let quoted: Vec<String> = args
9230 .iter()
9231 .map(|a| {
9232 let escaped = a.replace('\'', "'\\''");
9233 format!("'{}'", escaped)
9234 })
9235 .collect();
9236 format!("{} {}", body, quoted.join(" "))
9237 };
9238 // Bump inuse → run → clear, matching C's lexer behavior.
9239 if let Ok(mut tab) = crate::ported::hashtable::aliastab_lock().write() {
9240 if let Some(a) = tab.get_mut(name) { a.inuse += 1; }
9241 }
9242 let status = with_executor(|exec| exec.execute_script(&combined).unwrap_or(1));
9243 if let Ok(mut tab) = crate::ported::hashtable::aliastab_lock().write() {
9244 if let Some(a) = tab.get_mut(name) { a.inuse = (a.inuse - 1).max(0); }
9245 }
9246 return Some(status);
9247 }
9248
9249 // Resolve to a compiled Chunk:
9250 // 1. Already in functions_compiled → use as-is
9251 // 2. AST-only (sourced / defined earlier) → compile on demand
9252 // 3. Pending autoload → trigger autoload, then retry the AST path
9253 // 4. Available via fpath ZWC scan → autoload via that, then AST path
9254 // 5. Not a function → None so fusevm falls back to host.exec
9255 let chunk = with_executor(|exec| {
9256 // Autoload pending: the legacy stub in self.functions makes
9257 // maybe_autoload / autoload_function were deleted with
9258 // the old exec.c stubs (they were return-false / no-op).
9259 // The autoload dispatch needs a proper port of
9260 // `Src/builtin.c:bin_autoload` + `Src/exec.c:loadautofn`.
9261 // Until that lands, skip the autoload trigger — the eager
9262 // fpath scan below covers the common interactive case.
9263 if let Some(c) = exec.functions_compiled.get(name) {
9264 return Some(c.clone());
9265 }
9266 exec.functions_compiled.get(name).cloned()
9267 });
9268
9269 let chunk = chunk?;
9270
9271 // FUNCNEST recursion guard. zsh enforces a max depth
9272 // (default 500) — past that the call is refused with
9273 // `<name>: maximum nested function level reached; increase
9274 // FUNCNEST?` and exit 1. Without this, `foo() { foo; }; foo`
9275 // overflowed the Rust stack instead of erroring gracefully.
9276 // zshrs's effective ceiling is lower than zsh's: each
9277 // `call_function` recursion consumes ~40KB of Rust stack
9278 // (the bytecode VM is recursive at the host level), so the
9279 // 8MB default stack tops out around ~150 frames. Cap at 100
9280 // by default — users with deeper need can raise FUNCNEST
9281 // explicitly AND run with a larger stack (RUST_MIN_STACK).
9282 let funcnest_limit = with_executor(|exec| {
9283 exec.scalar("FUNCNEST")
9284 .and_then(|s| s.parse::<usize>().ok())
9285 .unwrap_or(100)
9286 });
9287 let cur_depth = with_executor(|exec| exec.local_scope_depth);
9288 if cur_depth >= funcnest_limit {
9289 eprintln!(
9290 "{}: maximum nested function level reached; increase FUNCNEST?",
9291 name
9292 );
9293 return Some(1);
9294 }
9295
9296 // Save and replace positional params, mirror local-scope save/restore
9297 // from the tree-walker `call_function`. The thread-local executor
9298 // pointer set by the outer VM remains valid for the nested VM —
9299 // nested CallBuiltin handlers and host callbacks all see the same
9300 // executor.
9301 let fn_name = name.to_string();
9302 // Snapshot options at function entry. zsh restores these on
9303 // exit when `local_options` is set at that time (per zshmisc
9304 // LOCAL_OPTIONS — `setopt local_options` and `emulate -L
9305 // ...` both arm the restore). Without this, a function that
9306 // does `setopt no_glob` to scope an option leaked the change
9307 // to the caller, breaking p10k/zinit's per-function emulate
9308 // -L sticky-mode pattern.
9309 let saved_options = crate::ported::options::opt_state_snapshot();
9310 let (
9311 saved_params,
9312 saved_zero,
9313 saved_scriptname,
9314 saved_funcstack,
9315 saved_exit_trap,
9316 ) = with_executor(|exec| {
9317 let prev = exec.pparams();
9318 exec.set_pparams(args.clone());
9319 exec.local_scope_depth += 1;
9320 // c:Src/exec.c doshfunc startparamscope() — bump
9321 // canonical locallevel before the function body runs
9322 // so any inner `local`/`typeset` writes Params at the
9323 // right scope. endparamscope at exit restores.
9324 crate::ported::params::locallevel
9325 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
9326 // Save and clear EXIT trap before function body
9327 // runs. Direct port of zsh's exec.c
9328 // `dotrapargs(SIGEXIT, ...)` deferred-fire pattern
9329 // — an EXIT trap set INSIDE a function fires on
9330 // function return (NOT shell exit), and the outer
9331 // EXIT trap is preserved across the call. Without
9332 // this save/restore, `foo() { trap "echo X" EXIT; }`
9333 // either fired X at SHELL exit (if no outer trap)
9334 // or polluted the parent's EXIT trap.
9335 let saved = exec.traps.remove("EXIT");
9336 // zsh's `$0` inside a function returns the function name
9337 // (under the FUNCTION_ARGZERO option, default on). Save
9338 // the previous `$0` and install the function name.
9339 // Anonymous functions get the cosmetic name `(anon)` —
9340 // zshrs's parser synthesizes `_zshrs_anon_N` /
9341 // `_zshrs_anon_kw_N` for `() { … }` and `function { … }`
9342 // so users would see the internal name otherwise.
9343 let display_name = if fn_name.starts_with("_zshrs_anon_") {
9344 "(anon)".to_string()
9345 } else {
9346 fn_name.clone()
9347 };
9348 let prev_zero = crate::ported::params::getsparam("0");
9349 exec.set_scalar("0".to_string(), display_name.clone());
9350 // scriptname: PS4's `%N` and error-message prefix both
9351 // read `exec.scriptname`. Inside a function, C zsh sets
9352 // `scriptname = dupstring(name)` at Src/exec.c:5903 so
9353 // `%N` shows the function name. Save the outer
9354 // scriptname before overwrite; restored on return.
9355 let prev_scriptname = std::mem::replace(
9356 &mut exec.scriptname,
9357 Some(display_name.clone()),
9358 );
9359 // funcstack: prepend the function name; outermost call
9360 // is at the END of the stack per zsh.
9361 let prev_stack = exec.array("funcstack");
9362 let mut new_stack = vec![fn_name.clone()];
9363 if let Some(ref s) = prev_stack {
9364 new_stack.extend_from_slice(s);
9365 }
9366 exec.set_array("funcstack".to_string(), new_stack);
9367 let line_base = exec
9368 .function_line_base
9369 .get(&fn_name)
9370 .copied()
9371 .unwrap_or(0);
9372 let def_file = exec
9373 .function_def_file
9374 .get(&fn_name)
9375 .cloned()
9376 .flatten();
9377 exec.prompt_funcstack
9378 .push((fn_name.clone(), line_base, def_file));
9379 // Set `$_` BEFORE the function body runs. zsh: inside
9380 // a function, `echo $_` reads the function name (when
9381 // called with no args) or the last call-arg.
9382 // Without this, internal builtins that ran before
9383 // (like REGISTER_COMPILED_FN) leaked their last arg
9384 // (the function body source!) as $_.
9385 let dollar_underscore = args.last().cloned().unwrap_or_else(|| fn_name.clone());
9386 exec.set_scalar("_".to_string(), dollar_underscore.clone());
9387 exec.pending_underscore = Some(dollar_underscore);
9388 (
9389 prev,
9390 prev_zero,
9391 prev_scriptname,
9392 prev_stack,
9393 saved,
9394 )
9395 });
9396
9397 crate::fusevm_disasm::maybe_print_stdout(
9398 &format!("host.call_function:{fn_name}"),
9399 &chunk,
9400 );
9401 let mut vm = fusevm::VM::new(chunk);
9402 register_builtins(&mut vm);
9403 // Seed the function-body VM with the parent's `$?` so a
9404 // function that reads `$?` BEFORE running any command sees
9405 // the caller's last status. Direct port of zsh's exec.c
9406 // `execfuncdef`/`doshfunc` semantics — function entry does
9407 // NOT reset `$?`. Without this, `false; foo() { echo $?; }; foo`
9408 // printed 0 instead of 1 because the fresh VM defaulted
9409 // last_status to 0.
9410 vm.last_status = with_executor(|exec| exec.last_status());
9411 let _ = vm.run();
9412 let status = vm.last_status;
9413
9414 // Fire any EXIT trap set INSIDE the function body, then
9415 // restore the outer EXIT trap. zsh fires the function-
9416 // scope EXIT trap BEFORE control returns to the caller,
9417 // so `foo() { trap "echo X" EXIT; }; foo; echo done`
9418 // outputs `X` then `done`. Without this, X never fired
9419 // (or fired at shell exit, polluting unrelated commands).
9420 let inner_exit = with_executor(|exec| exec.traps.remove("EXIT"));
9421 if let Some(action) = inner_exit {
9422 // Run the trap in the current (still-inside-function)
9423 // scope so it sees `$0 == fn_name` etc. Errors are
9424 // swallowed — zsh's trap dispatch tolerates body
9425 // failures.
9426 let _ = with_executor(|exec| {
9427 exec.set_last_status(status);
9428 exec.execute_script_zsh_pipeline(&action)
9429 });
9430 }
9431 // Restore outer EXIT trap (if any).
9432 if let Some(outer) = saved_exit_trap {
9433 with_executor(|exec| {
9434 exec.traps.insert("EXIT".to_string(), outer);
9435 });
9436 }
9437
9438 with_executor(|exec| {
9439 // Set `$_` to the last arg the function was called with
9440 // (or the function name when called with no args). zsh:
9441 // `$_` after `foo arg` is `arg`; after `foo` (no args) is
9442 // `foo`. The function-internal `pop_args` calls polluted
9443 // pending_underscore with internal command args; clear and
9444 // overwrite here so the caller sees the function's call
9445 // form, not internal `return 42` arg.
9446 let last_call_arg = args.last().cloned().unwrap_or_else(|| fn_name.clone());
9447 exec.set_scalar("_".to_string(), last_call_arg.clone());
9448 exec.pending_underscore = Some(last_call_arg);
9449 exec.set_pparams(saved_params);
9450 exec.local_scope_depth -= 1;
9451 // LOCAL_OPTIONS: when set at function exit, restore all
9452 // options to the snapshot taken at entry. `emulate -L`
9453 // arms this; plugin code uses both forms to scope option
9454 // changes inside helpers without leaking to callers.
9455 // Without it, `setopt no_glob` inside a helper polluted
9456 // the caller's option state.
9457 if crate::ported::options::opt_state_get("localoptions").unwrap_or(false) {
9458 // Walk all options touched since entry; reset to snapshot.
9459 let current = crate::ported::options::opt_state_snapshot();
9460 for (k, _) in ¤t {
9461 if !saved_options.contains_key(k) {
9462 crate::ported::options::opt_state_unset(k);
9463 }
9464 }
9465 for (k, v) in &saved_options {
9466 crate::ported::options::opt_state_set(k, *v);
9467 }
9468 }
9469 let _ = exec; // exec still used below for other restores
9470 // Restore `$0`, scriptname, and `$funcstack` to their
9471 // pre-call values. scriptname mirrors C exec.c:5907
9472 // `scriptname = oldscriptname;` after execode returns.
9473 match saved_zero {
9474 Some(v) => {
9475 exec.set_scalar("0".to_string(), v);
9476 }
9477 None => {
9478 exec.unset_scalar("0");
9479 }
9480 }
9481 exec.scriptname = saved_scriptname;
9482 exec.prompt_funcstack.pop();
9483 match saved_funcstack {
9484 Some(s) => {
9485 exec.set_array("funcstack".to_string(), s);
9486 }
9487 None => {
9488 exec.unset_array("funcstack");
9489 }
9490 }
9491 // c:Src/exec.c doshfunc → endparamscope(). Walks paramtab
9492 // restoring Param.old chain for every local declaration
9493 // made during the call.
9494 crate::ported::params::endparamscope();
9495 });
9496
9497 Some(status)
9498 }
9499}
9500
9501// ───────────────────────────────────────────────────────────────────────────
9502// Host-routed shell ops: ShellExecutor methods invoked by ZshrsHost from the
9503// fusevm VM. Not a port of Src/exec.c (see file-level docs above) — they're
9504// the bridge between fusevm opcodes and ShellExecutor state.
9505// ───────────────────────────────────────────────────────────────────────────
9506impl crate::ported::exec::ShellExecutor {
9507 // ─── Host-routed shell ops (called by ZshrsHost from fusevm) ────────────
9508
9509 /// Apply a single redirection. The current scope's saved-fd vec gets a
9510 /// dup of the original fd so it can be restored by `host_redirect_scope_end`.
9511 /// `op_byte` matches `fusevm::op::redirect_op::*`.
9512 /// Apply a file-open result to a redirect fd; on error, emit
9513 /// zsh-format diagnostic, set redirect_failed, sink fd to /dev/null.
9514 /// Shared between WRITE/APPEND/READ/CLOBBER arms in
9515 /// host_apply_redirect to keep the error-handling identical.
9516 fn redir_open_or_fail(
9517 fd: i32,
9518 result: std::io::Result<std::fs::File>,
9519 target: &str,
9520 redirect_failed: &mut bool,
9521 ) -> bool {
9522 match result {
9523 Ok(file) => {
9524 let new_fd = file.into_raw_fd();
9525 unsafe {
9526 libc::dup2(new_fd, fd);
9527 libc::close(new_fd);
9528 }
9529 true
9530 }
9531 Err(e) => {
9532 let msg = match e.kind() {
9533 std::io::ErrorKind::PermissionDenied => "permission denied",
9534 std::io::ErrorKind::NotFound => "no such file or directory",
9535 std::io::ErrorKind::IsADirectory => "is a directory",
9536 _ => "redirect failed",
9537 };
9538 eprintln!("zshrs:1: {}: {}", msg, target);
9539 *redirect_failed = true;
9540 if let Ok(devnull) = std::fs::OpenOptions::new()
9541 .read(true)
9542 .write(true)
9543 .open("/dev/null")
9544 {
9545 let new_fd = devnull.into_raw_fd();
9546 unsafe {
9547 libc::dup2(new_fd, fd);
9548 libc::close(new_fd);
9549 }
9550 }
9551 false
9552 }
9553 }
9554 }
9555
9556 pub fn host_apply_redirect(&mut self, fd: u8, op_byte: u8, target: &str) {
9557 // `&>` / `&>>` always target both fd 1 and fd 2 regardless of the
9558 // fd byte the parser supplied (the lexer's tokfd clamp makes the
9559 // raw value unreliable for these forms).
9560 let fd: i32 = if matches!(op_byte, r::WRITE_BOTH | r::APPEND_BOTH) {
9561 1
9562 } else {
9563 fd as i32
9564 };
9565 let saved = unsafe { libc::dup(fd) };
9566 if saved >= 0 {
9567 if let Some(top) = self.redirect_scope_stack.last_mut() {
9568 top.push((fd, saved));
9569 } else {
9570 // No scope — leave saved fd open and let the next scope
9571 // reclaim it. (Caller without a scope leaks the dup; this
9572 // matches `WithRedirects` parser construction always wrapping.)
9573 unsafe { libc::close(saved) };
9574 }
9575 }
9576 // For `&>` / `&>>` also save fd 2 so the scope restores it after
9577 // the body. Otherwise stderr stays redirected past the command.
9578 if matches!(op_byte, r::WRITE_BOTH | r::APPEND_BOTH) {
9579 let saved2 = unsafe { libc::dup(2) };
9580 if saved2 >= 0 {
9581 if let Some(top) = self.redirect_scope_stack.last_mut() {
9582 top.push((2, saved2));
9583 } else {
9584 unsafe { libc::close(saved2) };
9585 }
9586 }
9587 }
9588 match op_byte {
9589 r::WRITE => {
9590 // Honor `setopt noclobber`: refuse to overwrite an
9591 // existing regular file unless `>!` / `>|` (CLOBBER).
9592 // zsh internally stores the inverted-name `clobber`
9593 // (default ON); `setopt noclobber` writes
9594 // `clobber=false`. Honor both keys.
9595 let noclobber = crate::ported::options::opt_state_get("noclobber").unwrap_or(false)
9596 || !crate::ported::options::opt_state_get("clobber").unwrap_or(true);
9597 if noclobber && std::path::Path::new(target).exists() {
9598 eprintln!("zshrs:1: file exists: {}", target);
9599 self.set_last_status(1);
9600 // Sink the upcoming command's stdout to /dev/null
9601 // so we don't leak its output to the terminal.
9602 // zsh skips the command entirely; we approximate by
9603 // discarding the output (the redirect target was
9604 // the user's chosen sink, but with noclobber the
9605 // file is protected — discarding matches the
9606 // user's intent better than printing to terminal).
9607 if let Ok(file) = std::fs::OpenOptions::new().write(true).open("/dev/null") {
9608 let new_fd = file.into_raw_fd();
9609 unsafe {
9610 libc::dup2(new_fd, fd);
9611 libc::close(new_fd);
9612 }
9613 }
9614 return;
9615 }
9616 if !Self::redir_open_or_fail(
9617 fd,
9618 std::fs::File::create(target),
9619 target,
9620 &mut self.redirect_failed,
9621 ) {
9622 self.set_last_status(1);
9623 }
9624 }
9625 r::CLOBBER => {
9626 if !Self::redir_open_or_fail(
9627 fd,
9628 std::fs::File::create(target),
9629 target,
9630 &mut self.redirect_failed,
9631 ) {
9632 self.set_last_status(1);
9633 }
9634 }
9635 r::APPEND => {
9636 if !Self::redir_open_or_fail(
9637 fd,
9638 std::fs::OpenOptions::new()
9639 .create(true)
9640 .append(true)
9641 .open(target),
9642 target,
9643 &mut self.redirect_failed,
9644 ) {
9645 self.set_last_status(1);
9646 }
9647 }
9648 r::READ => {
9649 if !Self::redir_open_or_fail(
9650 fd,
9651 std::fs::File::open(target),
9652 target,
9653 &mut self.redirect_failed,
9654 ) {
9655 self.set_last_status(1);
9656 }
9657 }
9658 r::READ_WRITE => {
9659 if let Ok(file) = std::fs::OpenOptions::new()
9660 .create(true)
9661 .truncate(false) // <> opens existing-or-new without truncating
9662 .read(true)
9663 .write(true)
9664 .open(target)
9665 {
9666 let new_fd = file.into_raw_fd();
9667 unsafe {
9668 libc::dup2(new_fd, fd);
9669 libc::close(new_fd);
9670 }
9671 }
9672 }
9673 r::DUP_READ | r::DUP_WRITE => {
9674 // Target is a numeric fd reference like `&3`. The parser
9675 // strips the `&` prefix before we get here in some paths,
9676 // others retain it — accept both. Also support `-` for
9677 // close-fd (`<&-` / `>&-`) per POSIX.
9678 let n = target.trim_start_matches('&');
9679 if n == "-" {
9680 unsafe { libc::close(fd) };
9681 } else if let Ok(src_fd) = n.parse::<i32>() {
9682 unsafe { libc::dup2(src_fd, fd) };
9683 } else {
9684 tracing::warn!(target = %target, "DUP redir: target not parseable as fd");
9685 }
9686 }
9687 r::WRITE_BOTH => {
9688 if let Ok(file) = std::fs::File::create(target) {
9689 let new_fd = file.into_raw_fd();
9690 unsafe {
9691 libc::dup2(new_fd, 1);
9692 libc::dup2(new_fd, 2);
9693 libc::close(new_fd);
9694 }
9695 }
9696 }
9697 r::APPEND_BOTH => {
9698 if let Ok(file) = std::fs::OpenOptions::new()
9699 .create(true)
9700 .append(true)
9701 .open(target)
9702 {
9703 let new_fd = file.into_raw_fd();
9704 unsafe {
9705 libc::dup2(new_fd, 1);
9706 libc::dup2(new_fd, 2);
9707 libc::close(new_fd);
9708 }
9709 }
9710 }
9711 _ => {}
9712 }
9713 }
9714
9715 /// Push a fresh redirect scope. `_count` is informational — the actual
9716 /// saved fds are appended by host_apply_redirect into the top scope.
9717 pub fn host_redirect_scope_begin(&mut self, _count: u8) {
9718 self.redirect_scope_stack.push(Vec::new());
9719 }
9720
9721 /// Pop the top redirect scope, restoring saved fds.
9722 pub fn host_redirect_scope_end(&mut self) {
9723 if let Some(saved) = self.redirect_scope_stack.pop() {
9724 for (fd, saved_fd) in saved.into_iter().rev() {
9725 unsafe {
9726 libc::dup2(saved_fd, fd);
9727 libc::close(saved_fd);
9728 }
9729 }
9730 }
9731 }
9732
9733 /// Set up `content` as stdin (fd 0) for the next command via a real pipe.
9734 /// Used by `Op::HereDoc(idx)` and `Op::HereString`.
9735 ///
9736 /// The pattern: dup2 the read end of a fresh pipe onto fd 0, save the
9737 /// original fd 0 into the active redirect scope so `WithRedirectsEnd`
9738 /// restores it, and spawn a thread that writes `content` to the write end
9739 /// and closes it (so the consumer sees EOF after the body). A thread is
9740 /// needed because writing could block on a finite pipe buffer.
9741 pub fn host_set_pending_stdin(&mut self, content: String) {
9742 let (read_end, write_end) = match os_pipe::pipe() {
9743 Ok(p) => p,
9744 Err(_) => return,
9745 };
9746 let saved = unsafe { libc::dup(libc::STDIN_FILENO) };
9747 if saved >= 0 {
9748 if let Some(top) = self.redirect_scope_stack.last_mut() {
9749 top.push((libc::STDIN_FILENO, saved));
9750 } else {
9751 unsafe { libc::close(saved) };
9752 }
9753 }
9754 let read_fd = std::os::unix::io::AsRawFd::as_raw_fd(&read_end);
9755 unsafe { libc::dup2(read_fd, libc::STDIN_FILENO) };
9756 drop(read_end);
9757 std::thread::spawn(move || {
9758 let mut w = write_end;
9759 let _ = w.write_all(content.as_bytes());
9760 });
9761 }
9762
9763 /// Spawn an external command using zshrs's full dispatch logic
9764 /// (intercepts, command_hash, redirect handling). Used by
9765 /// `ZshrsHost::exec` so the bytecode VM's `Op::Exec` and
9766 /// `Op::CallFunction` external fallback get the same semantics as
9767 /// the tree-walker's `execute_external` rather than a plain
9768 /// `Command::new` shortcut. Returns the exit status.
9769 pub fn host_exec_external(&mut self, args: &[String]) -> i32 {
9770 // If a glob expansion in this command's argv triggered the
9771 // nomatch error path, suppress the actual exec and return
9772 // status 1 — mirrors zsh's command-aborted-on-glob-error
9773 // behaviour. The flag is reset BEFORE returning so the next
9774 // command starts clean.
9775 if self.current_command_glob_failed.get() {
9776 self.current_command_glob_failed.set(false);
9777 self.set_last_status(1);
9778 return 1;
9779 }
9780 let Some((cmd, rest)) = args.split_first() else {
9781 return 0;
9782 };
9783 // Empty command name (e.g. result of an empty `$(false)`
9784 // command-sub being the only word) — zsh: no command runs,
9785 // exit status preserved from prior step. Was hitting the
9786 // "command not found: " path with empty name.
9787 if cmd.is_empty() && rest.is_empty() {
9788 return self.last_status();
9789 }
9790 let rest_vec: Vec<String> = rest.to_vec();
9791 // Update `$_` with the just-arriving argv so the next command
9792 // reads `_=<last_arg>`. Mirrors C zsh's writeback in
9793 // `execcmd_exec` (Src/exec.c). Per `args.last()` semantics,
9794 // when invoked as `cmd a b c`, `$_` becomes "c" — for a bare
9795 // command with no args, `$_` becomes the command name itself.
9796 crate::ported::params::set_zunderscore(args);
9797
9798 // Builtins not in fusevm's name→id table fall through to
9799 // host.exec. Catch them here before the OS-level exec attempts
9800 // to spawn a non-existent binary.
9801 match cmd.as_str() {
9802 "sched" => return self.bin_sched(&rest_vec),
9803 "echotc" => return crate::fusevm_bridge::dispatch_builtin("echotc", rest_vec.clone()),
9804 "echoti" => return crate::fusevm_bridge::dispatch_builtin("echoti", rest_vec.clone()),
9805 // "getln" handler deleted with its stub.
9806 "zpty" => return crate::fusevm_bridge::dispatch_builtin("zpty", rest_vec.clone()),
9807 "ztcp" => return crate::fusevm_bridge::dispatch_builtin("ztcp", rest_vec.clone()),
9808 "zsocket" => {
9809 // Shim — parses the BUILTIN spec "ad:ltv" from
9810 // socket.c:276 into a real `options` struct, then
9811 // invokes the canonical free-fn port at
9812 // crate::ported::modules::socket::bin_zsocket whose
9813 // signature matches C `bin_zsocket(nam, args, ops,
9814 // func)` exactly.
9815 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9816 argscount: 0, argsalloc: 0 };
9817 let mut positional: Vec<String> = Vec::new();
9818 let mut i = 0;
9819 while i < rest_vec.len() {
9820 let a = &rest_vec[i];
9821 if a == "--" {
9822 i += 1;
9823 positional.extend_from_slice(&rest_vec[i..]);
9824 break;
9825 }
9826 if let Some(rest) = a.strip_prefix('-') {
9827 if rest.is_empty() { positional.push(a.clone()); i += 1; continue; }
9828 let chars: Vec<char> = rest.chars().collect();
9829 let mut j = 0;
9830 while j < chars.len() {
9831 let c = chars[j] as u8;
9832 if c == b'd' {
9833 ops.ind[c as usize] = (ops.args.len() + 1) as u8;
9834 let rest_after = &rest[j + 1..];
9835 if !rest_after.is_empty() {
9836 ops.args.push(rest_after.to_string());
9837 } else {
9838 i += 1;
9839 ops.args.push(rest_vec.get(i).cloned().unwrap_or_default());
9840 }
9841 ops.argscount = ops.args.len() as i32;
9842 break;
9843 }
9844 if c.is_ascii_alphabetic() { ops.ind[c as usize] = 1; }
9845 j += 1;
9846 }
9847 } else {
9848 positional.push(a.clone());
9849 }
9850 i += 1;
9851 }
9852 return bin_zsocket("zsocket", &positional, &ops, 0);
9853 }
9854 "private" => {
9855 // bin_private now takes the canonical C signature
9856 // (name, args, ops, func, assigns) per Src/Modules/
9857 // param_private.c:217.
9858 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9859 argscount: 0, argsalloc: 0 };
9860 let mut assigns: Vec<(String, String)> = Vec::new();
9861 return crate::modules::param_private::bin_private("private",
9862 &rest_vec, &mut ops, 0, &mut assigns);
9863 }
9864 "zformat" => return crate::fusevm_bridge::dispatch_builtin("zformat", rest_vec.clone()),
9865 "zregexparse" => return crate::fusevm_bridge::dispatch_builtin("zregexparse", rest_vec.clone()),
9866 // `unalias`/`unhash`/`unfunction` share `bin_unhash` but
9867 // each carries its own funcid (BIN_UNALIAS / BIN_UNHASH /
9868 // BIN_UNFUNCTION) in the BUILTINS table. Route through
9869 // execbuiltin so the correct funcid + optstr propagate —
9870 // without this `unalias` was a silent no-op.
9871 "unalias" | "unhash" | "unfunction" => {
9872 // Fallback when fusevm doesn't have a BUILTIN_*
9873 // opcode registered for the name (e.g. shell-builtin
9874 // table mismatch). Route through execbuiltin with the
9875 // correct entry from BUILTINS.
9876 let bn_idx = crate::ported::builtin::BUILTINS.iter()
9877 .position(|b| b.node.nam == cmd.as_str());
9878 if let Some(idx) = bn_idx {
9879 let bn_static: &'static crate::ported::zsh_h::builtin =
9880 &crate::ported::builtin::BUILTINS[idx];
9881 let bn_ptr = bn_static as *const _ as *mut _;
9882 return crate::ported::builtin::execbuiltin(
9883 rest_vec, Vec::new(), bn_ptr);
9884 }
9885 return 1;
9886 }
9887 // zsh-bundled rename helpers — implemented natively in
9888 // Rust so `autoload -U zmv` works without shipping the
9889 // function source. (Without this, the autoload path hangs.)
9890 "zmv" => return crate::extensions::ext_builtins::zmv(&rest_vec, "mv"),
9891 "zcp" => return crate::extensions::ext_builtins::zmv(&rest_vec, "cp"),
9892 "zln" => return crate::extensions::ext_builtins::zmv(&rest_vec, "ln"),
9893 "zcalc" => return crate::extensions::ext_builtins::zcalc(&rest_vec),
9894 "zselect" => {
9895 // Canonical bin_zselect per zselect.c:65 takes
9896 // (nam, args, ops, func); the C source parses its
9897 // own option string inline, so an empty Options is
9898 // sufficient at this call site.
9899 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9900 argscount: 0, argsalloc: 0 };
9901 return crate::ported::modules::zselect::bin_zselect(
9902 "zselect", &rest_vec, &ops, 0);
9903 }
9904 "cap" => return crate::fusevm_bridge::dispatch_builtin("cap", rest_vec.clone()),
9905 "getcap" => return crate::fusevm_bridge::dispatch_builtin("getcap", rest_vec.clone()),
9906 "setcap" => return crate::fusevm_bridge::dispatch_builtin("setcap", rest_vec.clone()),
9907 "yes" => return self.builtin_yes(&rest_vec),
9908 "nl" => return self.builtin_nl(&rest_vec),
9909 "env" => return self.builtin_env(&rest_vec),
9910 "printenv" => return self.builtin_printenv(&rest_vec),
9911 "tty" => return self.builtin_tty(&rest_vec),
9912 "chgrp" => {
9913 // Canonical bin_chown per files.c:725 with func=BIN_CHGRP
9914 // per the bintab entry at c:805. BUILTIN spec "hRs".
9915 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9916 argscount: 0, argsalloc: 0 };
9917 let mut positional: Vec<String> = Vec::new();
9918 let mut i = 0;
9919 while i < rest_vec.len() {
9920 let a = &rest_vec[i];
9921 if a == "--" { i += 1; positional.extend_from_slice(&rest_vec[i..]); break; }
9922 if let Some(rest) = a.strip_prefix('-') {
9923 if rest.is_empty() { positional.push(a.clone()); i += 1; continue; }
9924 for c in rest.chars() {
9925 let cb = c as u8;
9926 if cb.is_ascii_alphabetic() { ops.ind[cb as usize] = 1; }
9927 }
9928 } else {
9929 positional.push(a.clone());
9930 }
9931 i += 1;
9932 }
9933 return crate::ported::modules::files::bin_chown(
9934 "chgrp", &positional, &ops,
9935 crate::ported::modules::files::BIN_CHGRP);
9936 }
9937 "nproc" => return self.builtin_nproc(&rest_vec),
9938 "expr" => return self.builtin_expr(&rest_vec),
9939 "sha256sum" => return self.builtin_sha256sum(&rest_vec),
9940 "base64" => return self.builtin_base64(&rest_vec),
9941 "tac" => return self.builtin_tac(&rest_vec),
9942 "expand" => return self.builtin_expand(&rest_vec),
9943 "unexpand" => return self.builtin_unexpand(&rest_vec),
9944 "paste" => return self.builtin_paste(&rest_vec),
9945 "fold" => return self.builtin_fold(&rest_vec),
9946 "shuf" => return self.builtin_shuf(&rest_vec),
9947 "comm" => return self.builtin_comm(&rest_vec),
9948 "cksum" => return self.builtin_cksum(&rest_vec),
9949 "factor" => return self.builtin_factor(&rest_vec),
9950 "tsort" => return self.builtin_tsort(&rest_vec),
9951 "sum" => return self.builtin_sum(&rest_vec),
9952 "mkfifo" => return self.builtin_mkfifo(&rest_vec),
9953 "link" => return self.builtin_link(&rest_vec),
9954 "unlink" => return self.builtin_unlink(&rest_vec),
9955 "dircolors" => return self.builtin_dircolors(&rest_vec),
9956 "groups" => return self.builtin_groups(&rest_vec),
9957 "arch" => return self.builtin_arch(&rest_vec),
9958 "nice" => return self.builtin_nice(&rest_vec),
9959 "logname" => return self.builtin_logname(&rest_vec),
9960 "tput" => return self.builtin_tput(&rest_vec),
9961 "users" => return self.builtin_users(&rest_vec),
9962 // "sync" => return self.bin_sync(&rest_vec),
9963 "zbuild" => return self.builtin_zbuild(&rest_vec),
9964 // `zf_*` aliases from `zsh/files` (Src/Modules/files.c
9965 // BUILTIN table at line 816-824). The C source binds
9966 // both unprefixed (`chmod`) and prefixed (`zf_chmod`)
9967 // names to the SAME `bin_chmod` etc. handlers — the
9968 // prefixed forms exist so a script can portably reach
9969 // the builtin even when a function or alias has shadowed
9970 // the bare name. Each arm routes through the canonical
9971 // free-fn port of Src/Modules/files.c, parsing the BUILTIN
9972 // optstr inline since the framework doesn't pre-parse.
9973 "zf_mkdir" | "mkdir" => {
9974 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
9975 argscount: 0, argsalloc: 0 };
9976 let mut positional: Vec<String> = Vec::new();
9977 let mut i = 0;
9978 while i < rest_vec.len() {
9979 let a = &rest_vec[i];
9980 if a == "--" {
9981 i += 1;
9982 positional.extend_from_slice(&rest_vec[i..]);
9983 break;
9984 }
9985 if let Some(rest) = a.strip_prefix('-') {
9986 if rest.is_empty() { positional.push(a.clone()); i += 1; continue; }
9987 let chars: Vec<char> = rest.chars().collect();
9988 let mut j = 0;
9989 while j < chars.len() {
9990 let c = chars[j] as u8;
9991 if c == b'm' {
9992 ops.ind[c as usize] = (ops.args.len() + 1) as u8;
9993 let rest_after = &rest[j + 1..];
9994 if !rest_after.is_empty() {
9995 ops.args.push(rest_after.to_string());
9996 } else {
9997 i += 1;
9998 ops.args.push(rest_vec.get(i).cloned().unwrap_or_default());
9999 }
10000 ops.argscount = ops.args.len() as i32;
10001 break;
10002 }
10003 if c.is_ascii_alphabetic() { ops.ind[c as usize] = 1; }
10004 j += 1;
10005 }
10006 } else {
10007 positional.push(a.clone());
10008 }
10009 i += 1;
10010 }
10011 return crate::ported::modules::files::bin_mkdir(
10012 cmd, &positional, &ops, 0);
10013 }
10014 "zf_rm" => {
10015 let mut ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
10016 argscount: 0, argsalloc: 0 };
10017 let mut positional: Vec<String> = Vec::new();
10018 let mut i = 0;
10019 while i < rest_vec.len() {
10020 let a = &rest_vec[i];
10021 if a == "--" {
10022 i += 1;
10023 positional.extend_from_slice(&rest_vec[i..]);
10024 break;
10025 }
10026 if let Some(rest) = a.strip_prefix('-') {
10027 if rest.is_empty() { positional.push(a.clone()); i += 1; continue; }
10028 for c in rest.chars() {
10029 let cb = c as u8;
10030 if cb.is_ascii_alphabetic() { ops.ind[cb as usize] = 1; }
10031 }
10032 } else {
10033 positional.push(a.clone());
10034 }
10035 i += 1;
10036 }
10037 return crate::ported::modules::files::bin_rm(
10038 "zf_rm", &positional, &ops, 0);
10039 }
10040 "zf_rmdir" => {
10041 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
10042 argscount: 0, argsalloc: 0 };
10043 return crate::ported::modules::files::bin_rmdir(
10044 "zf_rmdir", &rest_vec, &ops, 0);
10045 }
10046 // `zstat` — port of zsh/stat module (Src/Modules/stat.c
10047 // BUILTIN("zstat", …)). Returns file metadata as
10048 // `field value` pairs / an assoc / a plus-separated
10049 // list depending on flags. zsh ALSO registers `stat`
10050 // bound to the same handler, but that name conflicts
10051 // with the system `stat(1)` binary (every script that
10052 // calls `stat -f '%Lp' …` would break). zsh resolves
10053 // this through opt-in `zmodload`; zshrs's modules are
10054 // statically linked so we keep `stat` routing to the
10055 // external command and only intercept the unambiguous
10056 // `zstat` name.
10057 "zstat" => {
10058 // bin_stat now takes the canonical C signature
10059 // (name, args, ops, func) per Src/Modules/stat.c:368.
10060 let ops = options { ind: [0u8; MAX_OPS], args: Vec::new(),
10061 argscount: 0, argsalloc: 0 };
10062 return crate::modules::stat::bin_stat("zstat", &rest_vec, &ops, 0);
10063 }
10064 _ => {}
10065 }
10066
10067 // AOP intercepts: when an `intercept :before/:around/:after foo` block
10068 // is registered, dynamic-command-name dispatch must consult it before
10069 // spawning. Without this, `cmd=ls; $cmd` bypasses every intercept that
10070 // a literal `ls` would trigger. The full_cmd string mirrors what the
10071 // tree-walker era passed (cmd + args joined by space) so existing
10072 // pattern matchers continue to work.
10073 if !self.intercepts.is_empty() {
10074 let full_cmd = if rest_vec.is_empty() {
10075 cmd.clone()
10076 } else {
10077 format!("{} {}", cmd, rest_vec.join(" "))
10078 };
10079 if let Some(intercept_result) = self.run_intercepts(cmd, &full_cmd, &rest_vec) {
10080 return intercept_result.unwrap_or(127);
10081 }
10082 }
10083
10084 // User-defined function lookup before OS-level exec. zsh's
10085 // dynamic-command-name dispatch (`cmd=hook1; $cmd`) checks
10086 // the function table FIRST — without this, `$f` for a
10087 // function-name `f` was always falling through to
10088 // `execute_external` and erroring "command not found".
10089 // Plugin code uses this pattern constantly:
10090 // for f in "${precmd_functions[@]}"; do "$f"; done
10091 if self.function_exists(cmd) {
10092 if let Some(status) = self.dispatch_function_call(cmd, &rest_vec) {
10093 return status;
10094 }
10095 }
10096
10097 self.execute_external(cmd, &rest_vec, &[]).unwrap_or(127)
10098 }
10099}