zshrs 0.11.5

The first compiled Unix shell — bytecode VM, worker pool, AOP intercept, Rkyv caching
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
//! `zsh/regex` module — direct port of `Src/Modules/regex.c`.
//!
//! Provides the `-regex-match` infix condition usable inside
//! `[[ … ]]`:
//!
//! ```text
//! [[ "$str" -regex-match "$pattern" ]]
//! ```
//!
//! On match, the cond op writes `$MATCH` / `$MBEGIN` / `$MEND`
//! plus `$match[1..N]` / `$mbegin[1..N]` / `$mend[1..N]` (or
//! `$BASH_REMATCH` if `BASHREMATCH` is set), exactly as the
//! C source does at regex.c:97-185.
//!
//! The C source has zero `struct ...` / `enum ...` definitions
//! (uses libc's `regex_t` / `regmatch_t` directly). Rust port
//! matches: zero types.

/// `ZREGEX_EXTENDED` from `Src/Modules/regex.c:36`.
/// `#define ZREGEX_EXTENDED 0`. The id passed to
/// `zcond_regex_match` for the only currently-supported flavour.
pub const ZREGEX_EXTENDED: i32 = 0;                                      // c:36

/// Port of static helper `zregex_regerrwarn()` from
/// `Src/Modules/regex.c:40`. C wraps libc `regerror(3)` to format
/// a regex compilation/match error and emit it via `zwarn`. Rust
/// uses the `regex` crate so `regex::Error` already carries a
/// formatted message — collapse C's two `regerror()` size+fill
/// calls into a single `zwarnnam` with the supplied prefix +
/// already-formatted error string.
///
/// C signature: `static void zregex_regerrwarn(int r, regex_t *re, char *msg)`.
pub fn zregex_regerrwarn(prefix: &str, err_msg: &str) {                  // c:40
    crate::ported::utils::zwarnnam(prefix, err_msg);                     // c:40
}

/// Port of `zcond_regex_match(char **a, int id)` from `Src/Modules/regex.c:54`.
///
/// C signature: `static int zcond_regex_match(char **a, int id)`.
/// Returns 1 on match, 0 on no match. The capture writeback into
/// `$MATCH` / `$match[]` / `$MBEGIN` / `$MEND` / `$mbegin[]` /
/// `$mend[]` (or `$BASH_REMATCH` under BASHREMATCH) happens
/// inline at regex.c:96-185. Rust port mirrors that writeback
/// so the param-table mutation has
/// the same observable effect.
///
/// `a` is the cond-op argv: `a[0]` is the LHS string, `a[1]` is
/// the RHS pattern (matching C's `cond_str(a, 0, 0)` /
/// `cond_str(a, 1, 0)` reads at regex.c:62-63).
pub fn zcond_regex_match(a: &[&str], id: i32) -> i32 {                       // c:54
    if a.len() < 2 {
        return 0;
    }
    let lhstr = a[0];                                                    // c:62 cond_str(a,0,0)
    let rhre = a[1];                                                     // c:63 cond_str(a,1,0)
    let mut return_value: i32 = 0;                                       // c:65

    // c:73-77 — switch(id). Only ZREGEX_EXTENDED is defined.
    if id != ZREGEX_EXTENDED {
        // c:188-191 default: DPUTS("bad regex option"); goto CLEAN.
        return 0;
    }

    // c:74-76 — flag computation. POSIX REG_EXTENDED is implicit
    // in Rust's regex crate (RE2 syntax is extended-by-default);
    // CASEMATCH off → REG_ICASE → wrap with `(?i)`.
    let casematch = crate::ported::zsh_h::isset(crate::ported::options::optlookup("casematch"));
    let pat_for_compile = if !casematch {                                // c:75
        format!("(?i){}", rhre)                                          // c:76 REG_ICASE
    } else {
        rhre.to_string()
    };

    // c:78 — regcomp(&re, rhre, rcflags).
    let re = match regex::Regex::new(&pat_for_compile) {
        Ok(r) => r,
        Err(_) => {                                                      // c:79-81
            zregex_regerrwarn("-regex-match", "failed to compile regex");
            return 0;                                                    // c:81 break;
        }
    };

    // c:92 — regexec.
    let captures = match re.captures(lhstr) {
        Some(c) => c,
        None => return 0,                                                // c:93-94 REG_NOMATCH
    };

    return_value = 1;                                                    // c:96
    let nsub = re.captures_len() - 1;                                    // re_nsub: # of paren groups
    let bashre = crate::ported::zsh_h::isset(crate::ported::options::optlookup("bashrematch"));
    let ksharr = crate::ported::zsh_h::isset(crate::ported::options::optlookup("ksharrays"));

    // c:97-103 — start/nelem branch on BASHREMATCH.
    let (start, nelem) = if bashre {
        (0usize, nsub + 1)                                               // c:99-100
    } else {
        (1usize, nsub)                                                   // c:102-103
    };

    // c:108-112 — build arr (the $match / $BASH_REMATCH array).
    let mut arr: Vec<String> = Vec::with_capacity(nelem);
    for n in start..=nsub {                                              // c:109
        if let Some(m) = captures.get(n) {                               // c:110
            arr.push(m.as_str().to_string());                            // c:110 metafy
        } else {
            arr.push(String::new());
        }
    }

    if bashre {                                                          // c:115
        // c:116 — `assignaparam("BASH_REMATCH", arr, 0);`
        crate::ported::params::setsparam("BASH_REMATCH", &arr.join(":"));
        return return_value;
    }

    // c:119-121 — assignsparam("MATCH", full-match-text).
    let m0 = captures.get(0).expect("regex matched but no group 0");
    let full = m0.as_str().to_string();                                  // c:120 metafy
    crate::ported::params::setsparam("MATCH", &full);            // c:121 assignsparam

    // c:124-135 — char-offset MBEGIN. C walks the pre-match bytes
    // counting MB_CHARLEN-stepped characters; Rust collapses to
    // chars().count() over the byte slice up to m->rm_so since
    // String::chars() handles UTF-8 boundaries natively.
    let so = m0.start();
    let eo = m0.end();
    let mbegin_chars = lhstr[..so].chars().count() as i64;               // c:128-133
    let kshoff: i64 = if ksharr { 0 } else { 1 };                        // c:134 !isset(KSHARRAYS)
    let mbegin = mbegin_chars + kshoff;                                  // c:134
    crate::ported::params::setiparam("MBEGIN", mbegin);          // c:134 assigniparam

    // c:138-145 — MEND.
    let match_chars = lhstr[so..eo].chars().count() as i64;
    let mend_total = mbegin_chars + match_chars;
    let mend = mend_total + kshoff - 1;                                  // c:145
    crate::ported::params::setiparam("MEND", mend);              // c:145 assigniparam

    // c:147-180 — populate $match[], $mbegin[], $mend[] subgroup
    // arrays.
    if nelem > 0 {                                                       // c:147
        let mut mbegin_arr: Vec<String> = Vec::with_capacity(nelem);
        let mut mend_arr: Vec<String> = Vec::with_capacity(nelem);
        for n in 0..nelem {                                              // c:152
            let cap_idx = start + n;
            match captures.get(cap_idx) {                                // c:158
                Some(m) => {
                    let beg_chars = lhstr[..m.start()].chars().count() as i64;
                    let len_chars = lhstr[m.start()..m.end()].chars().count() as i64;
                    mbegin_arr.push((beg_chars + kshoff).to_string());   // c:172
                    mend_arr.push((beg_chars + len_chars + kshoff - 1).to_string()); // c:178
                }
                None => {                                                // c:159-162 — unparticipated group
                    mbegin_arr.push("-1".to_string());
                    mend_arr.push("-1".to_string());
                }
            }
        }
        // c:182-184 — `setaparam("match"/"mbegin"/"mend", ...);`
        crate::ported::params::setsparam("match",  &arr.join(":"));
        crate::ported::params::setsparam("mbegin", &mbegin_arr.join(":"));
        crate::ported::params::setsparam("mend",   &mend_arr.join(":"));
    }

    return_value                                                         // c:200
}



// =====================================================================
// static struct features module_features                            c:217 (regex.c)
// =====================================================================

use crate::ported::zsh_h::module;

// `cotab` — port of `static struct conddef cotab[]` (regex.c).


// `module_features` — port of `static struct features module_features`
// from regex.c:217.



/// Port of `setup_(UNUSED(Module m))` from `Src/Modules/regex.c:229`.
#[allow(unused_variables)]
pub fn setup_(m: *const module) -> i32 {                                    // c:229
    // C body c:231-232 — `return 0`. Faithful empty-body port.
    0
}

/// Port of `features_(UNUSED(Module m), UNUSED(char ***features))` from `Src/Modules/regex.c:236`.
pub fn features_(m: *const module, features: &mut Vec<String>) -> i32 {
    *features = featuresarray(m, module_features());
    0
}

/// Port of `enables_(UNUSED(Module m), UNUSED(int **enables))` from `Src/Modules/regex.c:244`.
pub fn enables_(m: *const module, enables: &mut Option<Vec<i32>>) -> i32 {
    handlefeatures(m, module_features(), enables)
}

/// Port of `boot_(UNUSED(Module m))` from `Src/Modules/regex.c:251`.
#[allow(unused_variables)]
pub fn boot_(m: *const module) -> i32 {                                     // c:251
    // C body c:253-254 — `return 0`. Faithful empty-body port; the
    //                    regex-match condition registers via cd_list.
    0
}

/// Port of `cleanup_(UNUSED(Module m))` from `Src/Modules/regex.c:258`.
pub fn cleanup_(m: *const module) -> i32 {
    setfeatureenables(m, module_features(), None)
}

/// Port of `finish_(UNUSED(Module m))` from `Src/Modules/regex.c:265`.
#[allow(unused_variables)]
pub fn finish_(m: *const module) -> i32 {                                   // c:265
    // C body c:267-268 — `return 0`. Faithful empty-body port.
    0
}

use crate::ported::zsh_h::features as features_t;
use std::sync::{Mutex, OnceLock};

static MODULE_FEATURES: OnceLock<Mutex<features_t>> = OnceLock::new();


// Local stubs for the per-module entry points. C uses generic
// `featuresarray`/`handlefeatures`/`setfeatureenables` (module.c:
// 3275/3370/3445) but those take `Builtin` + `Features` pointer
// fields the Rust port doesn't carry. The hardcoded descriptor
// list mirrors the C bintab/conddefs/mathfuncs/paramdefs.
// WARNING: NOT IN REGEX.C — Rust-only module-framework shim.
// C uses generic featuresarray/handlefeatures/setfeatureenables from
// Src/module.c:3275/3370/3445 with C-side Builtin/Features pointers;
// Rust per-module shims hardcode the bintab/conddefs/mathfuncs/paramdefs.
fn featuresarray(_m: *const module, _f: &Mutex<features_t>) -> Vec<String> {
    vec!["c:regex-match".to_string()]
}

// WARNING: NOT IN REGEX.C — Rust-only module-framework shim.
// C uses generic featuresarray/handlefeatures/setfeatureenables from
// Src/module.c:3275/3370/3445 with C-side Builtin/Features pointers;
// Rust per-module shims hardcode the bintab/conddefs/mathfuncs/paramdefs.
fn handlefeatures(
    _m: *const module,
    _f: &Mutex<features_t>,
    enables: &mut Option<Vec<i32>>,
) -> i32 {
    if enables.is_none() {
        *enables = Some(vec![1; 1]);
    }
    0
}

// WARNING: NOT IN REGEX.C — Rust-only module-framework shim.
// C uses generic featuresarray/handlefeatures/setfeatureenables from
// Src/module.c:3275/3370/3445 with C-side Builtin/Features pointers;
// Rust per-module shims hardcode the bintab/conddefs/mathfuncs/paramdefs.
fn setfeatureenables(
    _m: *const module,
    _f: &Mutex<features_t>,
    _e: Option<&[i32]>,
) -> i32 {
    0
}

// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// ─── RUST-ONLY ACCESSORS ───
//
// Singleton accessor fns for `OnceLock<Mutex<T>>` / `OnceLock<
// RwLock<T>>` globals declared above. C zsh uses direct global
// access; Rust needs these wrappers because `OnceLock::get_or_init`
// is the only way to lazily construct shared state. These fns sit
// here so the body of this file reads in C source order without
// the accessor wrappers interleaved between real port fns.
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// ─── RUST-ONLY ACCESSORS ───
//
// Singleton accessor fns for `OnceLock<Mutex<T>>` / `OnceLock<
// RwLock<T>>` globals declared above. C zsh uses direct global
// access; Rust needs these wrappers because `OnceLock::get_or_init`
// is the only way to lazily construct shared state. These fns sit
// here so the body of this file reads in C source order without
// the accessor wrappers interleaved between real port fns.
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

// WARNING: NOT IN REGEX.C — Rust-only module-framework shim.
// C uses generic featuresarray/handlefeatures/setfeatureenables from
// Src/module.c:3275/3370/3445 with C-side Builtin/Features pointers;
// Rust per-module shims hardcode the bintab/conddefs/mathfuncs/paramdefs.
fn module_features() -> &'static Mutex<features_t> {
    MODULE_FEATURES.get_or_init(|| Mutex::new(features_t {
        bn_list: None,
        bn_size: 0,
        cd_list: None,
        cd_size: 1,
        mf_list: None,
        mf_size: 0,
        pd_list: None,
        pd_size: 0,
        n_abstract: 0,
    }))
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Port of `zcond_regex_match(char **a, int id)` from `Src/Modules/regex.c:54`.
    #[test]
    fn match_returns_one() {
        let r = zcond_regex_match(&["hello world", "wor.d"], ZREGEX_EXTENDED);
        assert_eq!(r, 1);
        // Side-effect params (MATCH/MBEGIN/MEND) flow through
        // ksh93::setsparam env-var bridge; they're verified at the
        // integration level (tests/zsh_compat_parity_gaps.rs) rather
        // than here against an in-memory executor map.
    }

    #[test]
    fn captures_returns_one() {
        let r = zcond_regex_match(&["foo=42", "([a-z]+)=([0-9]+)"], ZREGEX_EXTENDED);
        assert_eq!(r, 1);
    }

    #[test]
    fn no_match_returns_zero() {
        let r = zcond_regex_match(&["abc", "xyz"], ZREGEX_EXTENDED);
        assert_eq!(r, 0);
    }

    #[test]
    fn invalid_pattern_returns_zero() {
        assert_eq!(
            zcond_regex_match(&["anything", "["], ZREGEX_EXTENDED),
            0
        );
    }

    #[test]
    fn missing_args_returns_zero() {
        assert_eq!(zcond_regex_match(&[], ZREGEX_EXTENDED), 0);
        assert_eq!(zcond_regex_match(&["only_lhs"], ZREGEX_EXTENDED), 0);
    }

    #[test]
    fn casematch_off_is_case_insensitive() {
        // c:74-76 — `casematch` flag drives whether REG_ICASE is OR'd
        // into the regcomp flags. `isset(CASEMATCH)` is the C-side
        // gate; the Rust port reads via `optlookup("casematch")`.
        //
        // The zsh C source declares `casematch` with `OPT_ALL`
        // (options.c:106) — defaults to ON in every emulation. C's
        // `createoptiontable` populates that default at shell start;
        // the Rust `OPTS_LIVE` table doesn't (it's initialized empty
        // and grows as the options builtin / startup code sets values).
        // Without an explicit set, `isset(CASEMATCH)` returns false,
        // the test silently wraps `(?i)` around `hello`, and `HELLO`
        // matches — yielding 1 instead of 0.
        //
        // Match the C startup contract: set casematch=true at the top,
        // restore at the end. Same idiom params.rs tests use for
        // `exec` (8212/8547/9392).
        let saved = crate::ported::options::opt_state_get("casematch").unwrap_or(false);
        crate::ported::options::opt_state_set("casematch", true);
        let r = zcond_regex_match(&["HELLO", "hello"], ZREGEX_EXTENDED);
        crate::ported::options::opt_state_set("casematch", saved);
        assert_eq!(r, 0, "casematch=true → case-sensitive → HELLO vs hello must NOT match");
    }

    /// c:74-76 — same flag, opposite branch. With `casematch=false`
    /// (the user did `unsetopt CASE_MATCH`), the Rust port must wrap
    /// the pattern in `(?i)` so `HELLO` matches `hello`. Pinning the
    /// inverse case prevents a regression that ignores the flag in
    /// both directions.
    #[test]
    fn casematch_unset_is_case_insensitive() {
        let saved = crate::ported::options::opt_state_get("casematch").unwrap_or(false);
        crate::ported::options::opt_state_set("casematch", false);
        let r = zcond_regex_match(&["HELLO", "hello"], ZREGEX_EXTENDED);
        crate::ported::options::opt_state_set("casematch", saved);
        assert_eq!(r, 1, "casematch=false → case-insensitive → HELLO matches hello");
    }

    /// c:54 — `zcond_regex_match` returns 1 when the pattern matches.
    /// Canonical positive case for `[[ string =~ regex ]]`. Regression
    /// returning 0 on a real match would break every `=~` use.
    #[test]
    fn matching_pattern_returns_one() {
        let r = zcond_regex_match(&["hello world", "world"], ZREGEX_EXTENDED);
        assert_eq!(r, 1);
    }

    /// c:54 — `^` requires match-at-start. Regression dropping anchor
    /// semantics would silently accept `[[ "barfoo" =~ ^foo ]]`.
    #[test]
    fn anchor_caret_requires_match_at_start() {
        assert_eq!(zcond_regex_match(&["foobar", "^foo"], ZREGEX_EXTENDED), 1);
        assert_eq!(zcond_regex_match(&["barfoo", "^foo"], ZREGEX_EXTENDED), 0);
    }

    /// c:54 — `$` requires match-at-end.
    #[test]
    fn anchor_dollar_requires_match_at_end() {
        assert_eq!(zcond_regex_match(&["barfoo", "foo$"], ZREGEX_EXTENDED), 1);
        assert_eq!(zcond_regex_match(&["foobar", "foo$"], ZREGEX_EXTENDED), 0);
    }

    /// c:54 — alternation `a|b` matches either branch. Regression
    /// breaking it crashes every theme using `[[ $term =~ xterm|screen ]]`.
    #[test]
    fn alternation_matches_either_branch() {
        assert_eq!(zcond_regex_match(&["xterm",  "xterm|screen"], ZREGEX_EXTENDED), 1);
        assert_eq!(zcond_regex_match(&["screen", "xterm|screen"], ZREGEX_EXTENDED), 1);
        assert_eq!(zcond_regex_match(&["bash",   "xterm|screen"], ZREGEX_EXTENDED), 0);
    }

    /// c:54 — `.` matches any single char (POSIX).
    #[test]
    fn dot_matches_any_single_char() {
        assert_eq!(zcond_regex_match(&["foo", "f.o"], ZREGEX_EXTENDED), 1);
        assert_eq!(zcond_regex_match(&["fXo", "f.o"], ZREGEX_EXTENDED), 1);
    }
}