use regex::Regex;
use crate::ported::utils::zwarnnam;
use crate::ported::zsh_h::options;
use crate::ported::zsh_h::OPT_ISSET;
use crate::ported::zsh_h::{OPT_ARG, OPT_HASARG};
pub const CPCRE_PLAIN: i32 = 0;
pub const PCRE2_CODE_UNIT_WIDTH: i32 = 8;
thread_local! {
static PCRE_PATTERN: std::cell::RefCell<Option<Regex>> = const {
std::cell::RefCell::new(None)
};
}
#[allow(non_snake_case)]
pub fn zpcre_utf8_enabled() -> i32 { let multibyte = crate::ported::zsh_h::isset(crate::ported::options::optlookup("multibyte")); if !multibyte {
return 0; }
let lc = std::env::var("LC_ALL")
.or_else(|_| std::env::var("LC_CTYPE"))
.or_else(|_| std::env::var("LANG"))
.unwrap_or_default();
if lc.to_uppercase().contains("UTF-8") || lc.to_uppercase().contains("UTF8") {
1 } else {
0
}
}
#[allow(unused_variables)]
pub fn bin_pcre_compile(nam: &str, args: &[String], ops: &options, func: i32) -> i32 { let mut pcre_opts: u32 = 0; let target_len: i32; let target: String;
if OPT_ISSET(ops, b'a') { pcre_opts |= 1; } if OPT_ISSET(ops, b'i') { pcre_opts |= 2; } if OPT_ISSET(ops, b'm') { pcre_opts |= 4; } if OPT_ISSET(ops, b'x') { pcre_opts |= 8; } if OPT_ISSET(ops, b's') { pcre_opts |= 16; }
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
target = args.first().cloned().unwrap_or_default();
target_len = target.len() as i32;
let _ = target_len;
let mut pattern_str = String::new();
if (pcre_opts & 2) != 0 { pattern_str.push_str("(?i)"); }
if (pcre_opts & 4) != 0 { pattern_str.push_str("(?m)"); }
if (pcre_opts & 16) != 0 { pattern_str.push_str("(?s)"); }
if (pcre_opts & 8) != 0 { pattern_str.push_str("(?x)"); }
if (pcre_opts & 1) != 0 { pattern_str.push('^'); }
pattern_str.push_str(&target);
match Regex::new(&pattern_str) {
Ok(re) => {
PCRE_PATTERN.with(|r| *r.borrow_mut() = Some(re));
0 }
Err(e) => {
zwarnnam(nam, &format!("error in regex: {}", e)); 1 }
}
}
#[allow(unused_variables)]
pub fn bin_pcre_study(nam: &str, args: &[String], ops: &options, func: i32) -> i32 { let has_pat = PCRE_PATTERN.with(|r| r.borrow().is_some());
if !has_pat { zwarnnam(nam, "no pattern has been compiled for study"); return 1; }
0
}
#[allow(non_snake_case)]
pub fn pcre_callout(_block: *mut std::ffi::c_void, _callout_data: *mut std::ffi::c_void) -> i32 {
0 }
use crate::ported::zsh_h::module;
#[allow(non_snake_case, clippy::too_many_arguments)]
pub fn zpcre_get_substrings( pat: *mut std::ffi::c_void,
arg: &str,
mdata: *mut std::ffi::c_void,
captured_count: i32,
matchvar: Option<&str>,
substravar: Option<&str>,
namedassoc: Option<&str>,
want_offset_pair: i32,
matchedinarr: i32,
want_begin_end: i32,
) -> i32 {
use crate::ported::params::{setsparam};
let mut capture_start: i32 = 1; if matchedinarr != 0 { capture_start = 0; }
let ovec: Vec<(usize, usize)> = Vec::new(); let _ = mdata;
if !ovec.is_empty() { let nelem = captured_count - 1;
if want_offset_pair != 0 { let offset_all = format!("{} {}", ovec[0].0, ovec[0].1); setsparam("ZPCRE_OP", &offset_all); }
if let Some(mv) = matchvar { let (s, e) = ovec[0]; let slice = arg.get(s..e).unwrap_or("");
let match_all = crate::ported::utils::metafy(slice); setsparam(mv, &match_all); }
if let Some(sv) = substravar { if want_begin_end == 0 || nelem != 0 { let mut matches: Vec<String> = Vec::with_capacity( (captured_count + 1 - capture_start) as usize,
);
let mut i = capture_start; while i < captured_count {
let vec_off = (2 * i) as usize; if let Some(&(s, e)) = ovec.get(vec_off / 2) {
let slice = arg.get(s..e).unwrap_or("");
matches.push(crate::ported::utils::metafy(slice)); } else {
matches.push(String::new());
}
i += 1;
}
crate::ported::params::setaparam(sv, matches); }
}
if let Some(na) = namedassoc { let _ = pat; let ncount: u32 = 0; if ncount != 0 { let hash: Vec<String> = Vec::with_capacity( ((ncount + 1) * 2) as usize,
);
crate::ported::params::sethparam(na, hash); }
}
if want_begin_end != 0 { let mut ptr_pos: usize = 0;
let mut offs: i64 = 0; let mut leftlen = ovec[0].0 as i32; while leftlen > 0 { offs += 1; let clen = {
let slice = arg.as_bytes().get(ptr_pos..ptr_pos + leftlen as usize)
.unwrap_or(&[]);
crate::ported::zsh_h::MB_CHARLEN(slice, slice.len()) };
ptr_pos += clen; leftlen -= clen as i32; }
let ksharrays = crate::ported::zsh_h::isset(
crate::ported::zsh_h::KSHARRAYS) as i64;
crate::ported::params::setiparam("MBEGIN", offs + 1 - ksharrays);
let mut leftlen = (ovec[0].1 - ovec[0].0) as i32; while leftlen > 0 { offs += 1; let clen = {
let slice = arg.as_bytes().get(ptr_pos..ptr_pos + leftlen as usize)
.unwrap_or(&[]);
crate::ported::zsh_h::MB_CHARLEN(slice, slice.len()) };
ptr_pos += clen; leftlen -= clen as i32; }
crate::ported::params::setiparam( "MEND", offs - ksharrays,
);
if nelem != 0 { let mut mbegin: Vec<String> = Vec::with_capacity(nelem as usize); let mut mend: Vec<String> = Vec::with_capacity(nelem as usize);
for i in 0..nelem as usize { let pair_idx = i + 1;
let pair = match ovec.get(pair_idx) {
Some(&p) => p,
None => continue,
};
let mut ptr_pos: usize = 0;
let mut offs: i64 = 0; let mut leftlen = pair.0 as i32; while leftlen > 0 { offs += 1; let clen = {
let slice = arg.as_bytes().get(ptr_pos..ptr_pos + leftlen as usize)
.unwrap_or(&[]);
crate::ported::zsh_h::MB_CHARLEN(slice, slice.len()) };
ptr_pos += clen; leftlen -= clen as i32; }
let buf = format!("{}", offs + 1 - ksharrays); mbegin.push(buf);
let mut leftlen = (pair.1 - pair.0) as i32; while leftlen > 0 { offs += 1; let clen = {
let slice = arg.as_bytes().get(ptr_pos..ptr_pos + leftlen as usize)
.unwrap_or(&[]);
crate::ported::zsh_h::MB_CHARLEN(slice, slice.len()) };
ptr_pos += clen; leftlen -= clen as i32; }
let buf = format!("{}", offs - ksharrays); mend.push(buf); }
crate::ported::params::setaparam("mbegin", mbegin); crate::ported::params::setaparam("mend", mend); }
}
}
0 }
#[allow(non_snake_case)]
pub fn getposint(instr: &str, nam: &str) -> i32 { let (ret, eptr) = crate::ported::utils::zstrtol(instr, 10);
let ret = ret as i32;
if !eptr.is_empty() || ret < 0 {
crate::ported::utils::zwarnnam(nam,
&format!("integer expected: {}", instr)); return -1; }
ret }
pub fn bin_pcre_match(nam: &str, args: &[String], ops: &options, _func: i32) -> (i32, Option<String>, Vec<Option<String>>) {
let ret: i32; let _c: u8 = 0; let mut matched_portion: Option<&str> = None; let plaintext: String; let receptacle: &str; let mut named: Option<&str> = None; let mut return_value: i32 = 1; let subject_len: i32; let mut offset_start: i32 = 0; let mut want_offset_pair: i32 = 0; let mut use_dfa: i32 = 0;
let has_pat = PCRE_PATTERN.with(|r| r.borrow().is_some());
if !has_pat { zwarnnam(nam, "no pattern has been compiled"); return (1, None, Vec::new()); }
if OPT_ISSET(ops, b'd') {
use_dfa = 1;
if OPT_HASARG(ops, b'v') || OPT_HASARG(ops, b'A') { zwarnnam(nam, "-d cannot be combined with -v or -A"); return (1, None, Vec::new()); }
} else {
matched_portion = Some(OPT_ARG(ops, b'v').unwrap_or("MATCH")); named = Some(OPT_ARG(ops, b'A').unwrap_or(".pcre.match")); }
let _ = matched_portion;
let _ = named;
receptacle = OPT_ARG(ops, b'a').unwrap_or("match"); let _ = receptacle;
if OPT_HASARG(ops, b'n') {
offset_start = getposint(OPT_ARG(ops, b'n').unwrap_or(""), nam); if offset_start < 0 {
return (1, None, Vec::new()); }
}
if OPT_ISSET(ops, b'b') {
want_offset_pair = 1;
}
let _ = want_offset_pair;
let _ = use_dfa;
plaintext = args.first().cloned().unwrap_or_default();
subject_len = plaintext.len() as i32;
let _ = subject_len;
let (full_match, captures) = PCRE_PATTERN.with(|r| -> (Option<String>, Vec<Option<String>>) {
let guard = r.borrow();
let re = match guard.as_ref() {
Some(re) => re,
None => return (None, Vec::new()),
};
let search_text: &str = if offset_start > 0 && (offset_start as usize) < plaintext.len() {
&plaintext[offset_start as usize..]
} else if (offset_start as usize) >= plaintext.len() {
return (None, Vec::new());
} else {
&plaintext
};
let caps = match re.captures(search_text) {
Some(c) => c,
None => return (None, Vec::new()),
};
let full = caps.get(0).map(|m| m.as_str().to_string()); let mut subs = Vec::new();
for i in 1..caps.len() { subs.push(caps.get(i).map(|m| m.as_str().to_string()));
}
(full, subs)
});
if full_match.is_some() { return_value = 0; }
ret = if full_match.is_some() { 1 } else { 0 }; let _ = ret;
(return_value, full_match, captures) }
pub fn cond_pcre_match(a: &[String], _id: i32) -> (i32, Option<String>, Vec<Option<String>>) {
if a.len() < 2 { return (0, None, Vec::new()); }
let lhs = &a[0];
let rhs = &a[1];
match Regex::new(rhs) {
Ok(re) => {
match re.captures(lhs) {
Some(caps) => {
let full = caps.get(0).map(|m| m.as_str().to_string());
let mut subs = Vec::new();
for i in 1..caps.len() {
subs.push(caps.get(i).map(|m| m.as_str().to_string()));
}
(1, full, subs)
}
None => (0, None, Vec::new()),
}
}
Err(_) => (0, None, Vec::new()),
}
}
#[allow(unused_variables)]
pub fn setup_(m: *const module) -> i32 { 0
}
pub fn features_(m: *const module, features: &mut Vec<String>) -> i32 { *features = featuresarray(m, module_features());
0
}
pub fn enables_(m: *const module, enables: &mut Option<Vec<i32>>) -> i32 { handlefeatures(m, module_features(), enables)
}
#[allow(unused_variables)]
pub fn boot_(m: *const module) -> i32 { 0
}
pub fn cleanup_(m: *const module) -> i32 { setfeatureenables(m, module_features(), None)
}
#[allow(unused_variables)]
pub fn finish_(m: *const module) -> i32 { 0
}
use crate::ported::zsh_h::features as features_t;
use std::sync::{Mutex, OnceLock};
static MODULE_FEATURES: OnceLock<Mutex<features_t>> = OnceLock::new();
fn featuresarray(_m: *const module, _f: &Mutex<features_t>) -> Vec<String> {
vec!["b:pcre_compile".to_string(), "b:pcre_match".to_string(), "b:pcre_study".to_string(), "c:pcre-match".to_string()]
}
fn handlefeatures(
_m: *const module,
_f: &Mutex<features_t>,
enables: &mut Option<Vec<i32>>,
) -> i32 {
if enables.is_none() {
*enables = Some(vec![1; 4]);
}
0
}
fn setfeatureenables(
_m: *const module,
_f: &Mutex<features_t>,
_e: Option<&[i32]>,
) -> i32 {
0
}
fn module_features() -> &'static Mutex<features_t> {
MODULE_FEATURES.get_or_init(|| Mutex::new(features_t {
bn_list: None,
bn_size: 3,
cd_list: None,
cd_size: 1,
mf_list: None,
mf_size: 0,
pd_list: None,
pd_size: 0,
n_abstract: 0,
}))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ported::zsh_h::MAX_OPS;
fn empty_ops() -> options {
options { ind: [0u8; MAX_OPS], args: Vec::new(), argscount: 0, argsalloc: 0 }
}
fn ops_with(flags: &[u8]) -> options {
let mut o = empty_ops();
for &c in flags { o.ind[c as usize] = 1; }
o
}
fn s(x: &str) -> String { x.to_string() }
#[test]
fn test_pcre_compile_simple() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let ops = empty_ops();
assert_eq!(bin_pcre_compile("pcre_compile", &[s("hello")], &ops, 0), 0);
assert!(PCRE_PATTERN.with(|r| r.borrow().is_some()));
}
#[test]
fn test_pcre_compile_invalid() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let ops = empty_ops();
assert_eq!(bin_pcre_compile("pcre_compile", &[s("[invalid")], &ops, 0), 1);
}
#[test]
fn test_pcre_compile_caseless() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let ops = ops_with(&[b'i']);
assert_eq!(bin_pcre_compile("pcre_compile", &[s("hello")], &ops, 0), 0);
let (status, full, _) = bin_pcre_match("pcre_match", &[s("HELLO WORLD")], &empty_ops(), 0);
assert_eq!(status, 0);
assert_eq!(full.as_deref(), Some("HELLO"));
}
#[test]
fn test_pcre_study_no_pattern() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
assert_eq!(bin_pcre_study("pcre_study", &[], &empty_ops(), 0), 1);
}
#[test]
fn test_pcre_study_with_pattern() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let ops = empty_ops();
bin_pcre_compile("pcre_compile", &[s("hello")], &ops, 0);
assert_eq!(bin_pcre_study("pcre_study", &[], &ops, 0), 0);
}
#[test]
fn test_pcre_match_simple() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
bin_pcre_compile("pcre_compile", &[s("hello")], &empty_ops(), 0);
let (status, full, _) = bin_pcre_match("pcre_match", &[s("hello world")], &empty_ops(), 0);
assert_eq!(status, 0);
assert_eq!(full.as_deref(), Some("hello"));
}
#[test]
fn test_pcre_match_no_match() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
bin_pcre_compile("pcre_compile", &[s("hello")], &empty_ops(), 0);
let (status, _, _) = bin_pcre_match("pcre_match", &[s("goodbye world")], &empty_ops(), 0);
assert_eq!(status, 1);
}
#[test]
fn test_pcre_match_captures() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
bin_pcre_compile("pcre_compile", &[s(r"(\w+) (\w+)")], &empty_ops(), 0);
let (status, _, caps) = bin_pcre_match("pcre_match", &[s("hello world")], &empty_ops(), 0);
assert_eq!(status, 0);
assert_eq!(caps.len(), 2);
assert_eq!(caps[0].as_deref(), Some("hello"));
assert_eq!(caps[1].as_deref(), Some("world"));
}
#[test]
fn test_cond_pcre_match() {
let (m, _, _) = cond_pcre_match(&[s("hello world"), s("hello")], 0);
assert_eq!(m, 1);
let (m, _, _) = cond_pcre_match(&[s("hello world"), s("(?i)HELLO")], 0);
assert_eq!(m, 1);
let (m, _, _) = cond_pcre_match(&[s("hello world"), s("HELLO")], 0);
assert_eq!(m, 0);
}
#[test]
fn test_builtin_pcre_compile_no_args() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
assert_eq!(bin_pcre_compile("pcre_compile", &[s("[")], &empty_ops(), 0), 1);
}
#[test]
fn test_builtin_pcre_match_no_pattern() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let (status, _, _) = bin_pcre_match("pcre_match", &[s("test")], &empty_ops(), 0);
assert_eq!(status, 1);
}
#[test]
fn getposint_parses_positive_decimal() {
let r = getposint("42", "test");
assert_eq!(r, 42);
}
#[test]
fn getposint_zero_is_valid() {
let r = getposint("0", "test");
assert_eq!(r, 0);
}
#[test]
fn getposint_non_numeric_returns_negative() {
let r = getposint("abc", "test");
assert!(r < 0, "non-numeric must return negative sentinel, got {}", r);
}
#[test]
fn bin_pcre_compile_no_args_compiles_empty_pattern() {
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let r = bin_pcre_compile("pcre_compile", &[], &empty_ops(), 0);
assert_eq!(r, 0, "C body has no arity check; empty pattern compiles");
}
#[test]
fn bin_pcre_match_no_args_returns_one() {
bin_pcre_compile("pcre_compile", &[s("x")], &empty_ops(), 0);
let (status, _, _) = bin_pcre_match("pcre_match", &[], &empty_ops(), 0);
assert_eq!(status, 1, "no subject must surface as error");
}
#[test]
fn cond_pcre_match_malformed_pattern_returns_no_match() {
let (m, _, _) = cond_pcre_match(&[s("anything"), s("[")], 0);
assert_eq!(m, 0, "malformed regex must fail-soft to no-match");
}
#[test]
fn cond_pcre_match_caret_anchor_requires_start() {
let (m, _, _) = cond_pcre_match(&[s("foo bar"), s("^foo")], 0);
assert_eq!(m, 1, "caret matches at start");
let (m, _, _) = cond_pcre_match(&[s("bar foo"), s("^foo")], 0);
assert_eq!(m, 0, "caret must NOT match mid-string");
}
#[test]
fn module_lifecycle_shims_all_return_zero() {
let m: *const module = std::ptr::null();
assert_eq!(setup_(m), 0);
assert_eq!(boot_(m), 0);
assert_eq!(cleanup_(m), 0);
assert_eq!(finish_(m), 0);
}
#[test]
fn getposint_rejects_trailing_garbage() {
assert_eq!(getposint("42abc", "test"), -1,
"c:317 — *eptr='a' truthy → error");
assert_eq!(getposint("100x", "test"), -1,
"c:317 — trailing non-digit must reject");
}
#[test]
fn getposint_rejects_trailing_whitespace() {
assert_eq!(getposint("42 ", "test"), -1,
"c:317 — trailing space → *eptr=' ' → error");
assert_eq!(getposint("42\t", "test"), -1,
"c:317 — trailing tab → *eptr='\\t' → error");
}
#[test]
fn getposint_skips_leading_whitespace() {
assert_eq!(getposint(" 42", "test"), 42,
"c:312 — zstrtol skips leading whitespace");
}
#[test]
fn getposint_rejects_negative() {
assert_eq!(getposint("-1", "test"), -1,
"c:317 — `ret < 0` branch fires for negative input");
assert_eq!(getposint("-100", "test"), -1);
}
#[test]
fn getposint_empty_input_returns_zero() {
assert_eq!(getposint("", "test"), 0,
"c:312-325 — empty input → 0 (no error)");
}
}