use crate::ported::utils::{metafy, zstrtol, zwarnnam};
use crate::ported::zsh_h::{OPT_ARG, OPT_HASARG, OPT_ISSET, module, options, isset, MB_CHARLEN, KSHARRAYS, features, MAX_OPS};
use regex::Regex;
use std::sync::{Mutex, OnceLock};
use crate::params::setsparam;
use crate::ported::options::optlookup;
use crate::ported::params::{setaparam, sethparam, setiparam};
pub const CPCRE_PLAIN: i32 = 0;
pub const PCRE2_CODE_UNIT_WIDTH: i32 = 8;
thread_local! {
static PCRE_PATTERN: std::cell::RefCell<Option<Regex>> = const {
std::cell::RefCell::new(None)
};
}
#[allow(non_snake_case)]
pub fn zpcre_utf8_enabled() -> i32 {
let multibyte = isset(optlookup("multibyte")); if !multibyte {
return 0; }
let lc = std::env::var("LC_ALL")
.or_else(|_| std::env::var("LC_CTYPE"))
.or_else(|_| std::env::var("LANG"))
.unwrap_or_default();
if lc.to_uppercase().contains("UTF-8") || lc.to_uppercase().contains("UTF8") {
1 } else {
0
}
}
#[allow(unused_variables)]
pub fn bin_pcre_compile(nam: &str, args: &[String], ops: &options, func: i32) -> i32 {
let mut pcre_opts: u32 = 0; let target_len: i32; let target: String;
if OPT_ISSET(ops, b'a') {
pcre_opts |= 1;
} if OPT_ISSET(ops, b'i') {
pcre_opts |= 2;
} if OPT_ISSET(ops, b'm') {
pcre_opts |= 4;
} if OPT_ISSET(ops, b'x') {
pcre_opts |= 8;
} if OPT_ISSET(ops, b's') {
pcre_opts |= 16;
}
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
target = args.first().cloned().unwrap_or_default();
target_len = target.len() as i32;
let _ = target_len;
let mut pattern_str = String::new();
if (pcre_opts & 2) != 0 {
pattern_str.push_str("(?i)");
}
if (pcre_opts & 4) != 0 {
pattern_str.push_str("(?m)");
}
if (pcre_opts & 16) != 0 {
pattern_str.push_str("(?s)");
}
if (pcre_opts & 8) != 0 {
pattern_str.push_str("(?x)");
}
if (pcre_opts & 1) != 0 {
pattern_str.push('^');
}
pattern_str.push_str(&target);
match Regex::new(&pattern_str) {
Ok(re) => {
PCRE_PATTERN.with(|r| *r.borrow_mut() = Some(re));
0 }
Err(e) => {
zwarnnam(nam, &format!("error in regex: {}", e)); 1 }
}
}
#[allow(unused_variables)]
pub fn bin_pcre_study(nam: &str, args: &[String], ops: &options, func: i32) -> i32 {
let has_pat = PCRE_PATTERN.with(|r| r.borrow().is_some());
if !has_pat {
zwarnnam(nam, "no pattern has been compiled for study"); return 1; }
0
}
#[allow(non_snake_case)]
pub fn pcre_callout(
_block: *mut std::ffi::c_void, _callout_data: *mut std::ffi::c_void,
) -> i32 {
0 }
#[allow(non_snake_case, clippy::too_many_arguments)]
pub fn zpcre_get_substrings(
pat: *mut std::ffi::c_void,
arg: &str,
mdata: *mut std::ffi::c_void,
captured_count: i32,
matchvar: Option<&str>,
substravar: Option<&str>,
namedassoc: Option<&str>,
want_offset_pair: i32,
matchedinarr: i32,
want_begin_end: i32,
) -> i32 {
let mut capture_start: i32 = 1; if matchedinarr != 0 {
capture_start = 0; }
let ovec: Vec<(usize, usize)> = Vec::new(); let _ = mdata;
if !ovec.is_empty() {
let nelem = captured_count - 1;
if want_offset_pair != 0 {
let offset_all = format!("{} {}", ovec[0].0, ovec[0].1); setsparam("ZPCRE_OP", &offset_all); }
if let Some(mv) = matchvar {
let (s, e) = ovec[0]; let slice = arg.get(s..e).unwrap_or("");
let match_all = metafy(slice); setsparam(mv, &match_all); }
if let Some(sv) = substravar {
if want_begin_end == 0 || nelem != 0 {
let mut matches: Vec<String> = Vec::with_capacity(
(captured_count + 1 - capture_start) as usize,
);
let mut i = capture_start; while i < captured_count {
let vec_off = (2 * i) as usize; if let Some(&(s, e)) = ovec.get(vec_off / 2) {
let slice = arg.get(s..e).unwrap_or("");
matches.push(metafy(slice)); } else {
matches.push(String::new());
}
i += 1;
}
setaparam(sv, matches); }
}
if let Some(na) = namedassoc {
let _ = pat; let ncount: u32 = 0; if ncount != 0 {
let hash: Vec<String> = Vec::with_capacity(
((ncount + 1) * 2) as usize,
);
sethparam(na, hash); }
}
if want_begin_end != 0 {
let mut ptr_pos: usize = 0;
let mut offs: i64 = 0; let mut leftlen = ovec[0].0 as i32; while leftlen > 0 {
offs += 1; let clen = {
let slice = arg
.as_bytes()
.get(ptr_pos..ptr_pos + leftlen as usize)
.unwrap_or(&[]);
MB_CHARLEN(slice, slice.len()) };
ptr_pos += clen; leftlen -= clen as i32; }
let ksharrays = isset(KSHARRAYS) as i64;
setiparam("MBEGIN", offs + 1 - ksharrays);
let mut leftlen = (ovec[0].1 - ovec[0].0) as i32; while leftlen > 0 {
offs += 1; let clen = {
let slice = arg
.as_bytes()
.get(ptr_pos..ptr_pos + leftlen as usize)
.unwrap_or(&[]);
MB_CHARLEN(slice, slice.len()) };
ptr_pos += clen; leftlen -= clen as i32; }
setiparam(
"MEND",
offs - ksharrays,
);
if nelem != 0 {
let mut mbegin: Vec<String> = Vec::with_capacity(nelem as usize); let mut mend: Vec<String> = Vec::with_capacity(nelem as usize);
for i in 0..nelem as usize {
let pair_idx = i + 1;
let pair = match ovec.get(pair_idx) {
Some(&p) => p,
None => continue,
};
let mut ptr_pos: usize = 0;
let mut offs: i64 = 0; let mut leftlen = pair.0 as i32; while leftlen > 0 {
offs += 1; let clen = {
let slice = arg
.as_bytes()
.get(ptr_pos..ptr_pos + leftlen as usize)
.unwrap_or(&[]);
MB_CHARLEN(slice, slice.len())
};
ptr_pos += clen; leftlen -= clen as i32; }
let buf = format!("{}", offs + 1 - ksharrays); mbegin.push(buf);
let mut leftlen = (pair.1 - pair.0) as i32; while leftlen > 0 {
offs += 1; let clen = {
let slice = arg
.as_bytes()
.get(ptr_pos..ptr_pos + leftlen as usize)
.unwrap_or(&[]);
MB_CHARLEN(slice, slice.len())
};
ptr_pos += clen; leftlen -= clen as i32; }
let buf = format!("{}", offs - ksharrays); mend.push(buf); }
setaparam("mbegin", mbegin); setaparam("mend", mend); }
}
}
0 }
#[allow(non_snake_case)]
pub fn getposint(instr: &str, nam: &str) -> i32 {
let (ret, eptr) = zstrtol(instr, 10);
let ret = ret as i32;
if !eptr.is_empty() || ret < 0 {
zwarnnam(nam, &format!("integer expected: {}", instr)); return -1; }
ret }
pub fn bin_pcre_match(
nam: &str,
args: &[String],
ops: &options,
_func: i32,
) -> i32 {
let ret: i32; let _c: u8 = 0; let mut matched_portion: Option<&str> = None; let plaintext: String; let receptacle: &str; let mut named: Option<&str> = None; let mut return_value: i32 = 1; let subject_len: i32; let mut offset_start: i32 = 0; let mut want_offset_pair: i32 = 0; let mut use_dfa: i32 = 0;
let has_pat = PCRE_PATTERN.with(|r| r.borrow().is_some());
if !has_pat {
zwarnnam(nam, "no pattern has been compiled"); return 1; }
if OPT_ISSET(ops, b'd') {
use_dfa = 1;
if OPT_HASARG(ops, b'v') || OPT_HASARG(ops, b'A') {
zwarnnam(nam, "-d cannot be combined with -v or -A"); return 1; }
} else {
matched_portion = Some(OPT_ARG(ops, b'v').unwrap_or("MATCH")); named = Some(OPT_ARG(ops, b'A').unwrap_or(".pcre.match")); }
receptacle = OPT_ARG(ops, b'a').unwrap_or("match");
if OPT_HASARG(ops, b'n') {
offset_start = getposint(OPT_ARG(ops, b'n').unwrap_or(""), nam); if offset_start < 0 {
return 1; }
}
if OPT_ISSET(ops, b'b') {
want_offset_pair = 1;
}
let _ = want_offset_pair;
let _ = use_dfa;
plaintext = args.first().cloned().unwrap_or_default();
subject_len = plaintext.len() as i32;
let _ = subject_len;
let (full_match, captures) = PCRE_PATTERN.with(|r| -> (Option<String>, Vec<Option<String>>) {
let guard = r.borrow();
let re = match guard.as_ref() {
Some(re) => re,
None => return (None, Vec::new()),
};
let search_text: &str = if offset_start > 0 && (offset_start as usize) <= plaintext.len() {
&plaintext[offset_start as usize..]
} else if (offset_start as usize) > plaintext.len() {
return (None, Vec::new());
} else {
&plaintext
};
let caps = match re.captures(search_text) {
Some(c) => c,
None => return (None, Vec::new()),
};
let full = caps.get(0).map(|m| m.as_str().to_string()); let mut subs = Vec::new();
for i in 1..caps.len() {
subs.push(caps.get(i).map(|m| m.as_str().to_string()));
}
(full, subs)
});
if full_match.is_some() {
return_value = 0; if let Some(m) = full_match.as_deref() {
crate::ported::params::setsparam(
matched_portion.unwrap_or("MATCH"),
m,
);
}
let subs: Vec<String> = captures
.iter()
.map(|opt| opt.clone().unwrap_or_default())
.collect();
crate::ported::params::setaparam(receptacle, subs);
let _ = named;
} else {
crate::ported::params::setsparam(matched_portion.unwrap_or("MATCH"), "");
crate::ported::params::setaparam(receptacle, Vec::new());
}
ret = if full_match.is_some() { 1 } else { 0 }; let _ = ret;
let _ = use_dfa;
let _ = want_offset_pair;
let _ = subject_len;
return_value }
pub fn cond_pcre_match(a: &[String], _id: i32) -> i32 {
if a.len() < 2 {
return 0;
}
let lhs = &a[0]; let rhs = &a[1];
match Regex::new(rhs) {
Ok(re) => {
match re.captures(lhs) {
Some(caps) => {
let full = caps.get(0).map(|m| m.as_str().to_string());
if let Some(m) = full {
crate::ported::params::setsparam("MATCH", &m); }
let subs: Vec<String> = (1..caps.len())
.map(|i| {
caps.get(i).map(|m| m.as_str().to_string()).unwrap_or_default()
})
.collect();
crate::ported::params::setaparam("match", subs); 1 }
None => {
crate::ported::params::setsparam("MATCH", "");
crate::ported::params::setaparam("match", Vec::new());
0
}
}
}
Err(_) => {
0
}
}
}
#[allow(unused_variables)]
pub fn setup_(m: *const module) -> i32 {
0
}
pub fn features_(m: *const module, features: &mut Vec<String>) -> i32 {
*features = featuresarray(m, module_features());
0
}
pub fn enables_(m: *const module, enables: &mut Option<Vec<i32>>) -> i32 {
handlefeatures(m, module_features(), enables)
}
#[allow(unused_variables)]
pub fn boot_(m: *const module) -> i32 {
0
}
pub fn cleanup_(m: *const module) -> i32 {
setfeatureenables(m, module_features(), None)
}
#[allow(unused_variables)]
pub fn finish_(m: *const module) -> i32 {
0
}
static MODULE_FEATURES: OnceLock<Mutex<features>> = OnceLock::new();
fn featuresarray(_m: *const module, _f: &Mutex<features>) -> Vec<String> {
vec![
"b:pcre_compile".to_string(),
"b:pcre_match".to_string(),
"b:pcre_study".to_string(),
"c:pcre-match".to_string(),
]
}
fn handlefeatures(
_m: *const module,
_f: &Mutex<features>,
enables: &mut Option<Vec<i32>>,
) -> i32 {
if enables.is_none() {
*enables = Some(vec![1; 4]);
}
0
}
fn setfeatureenables(_m: *const module, _f: &Mutex<features>, _e: Option<&[i32]>) -> i32 {
0
}
fn module_features() -> &'static Mutex<features> {
MODULE_FEATURES.get_or_init(|| {
Mutex::new(features {
bn_list: None,
bn_size: 3,
cd_list: None,
cd_size: 1,
mf_list: None,
mf_size: 0,
pd_list: None,
pd_size: 0,
n_abstract: 0,
})
})
}
#[cfg(test)]
mod tests {
use super::*;
fn empty_ops() -> options {
options {
ind: [0u8; MAX_OPS],
args: Vec::new(),
argscount: 0,
argsalloc: 0,
}
}
fn ops_with(flags: &[u8]) -> options {
let mut o = empty_ops();
for &c in flags {
o.ind[c as usize] = 1;
}
o
}
fn s(x: &str) -> String {
x.to_string()
}
#[test]
fn test_pcre_compile_simple() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let ops = empty_ops();
assert_eq!(bin_pcre_compile("pcre_compile", &[s("hello")], &ops, 0), 0);
assert!(PCRE_PATTERN.with(|r| r.borrow().is_some()));
}
#[test]
fn test_pcre_compile_invalid() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let ops = empty_ops();
assert_eq!(
bin_pcre_compile("pcre_compile", &[s("[invalid")], &ops, 0),
1
);
}
#[test]
fn test_pcre_compile_caseless() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let ops = ops_with(&[b'i']);
assert_eq!(bin_pcre_compile("pcre_compile", &[s("hello")], &ops, 0), 0);
let status = bin_pcre_match("pcre_match", &[s("HELLO WORLD")], &empty_ops(), 0);
assert_eq!(status, 0);
assert_eq!(
crate::ported::params::getsparam("MATCH").as_deref(),
Some("HELLO"),
"c:405 — setsparam(MATCH, matched_portion)"
);
}
#[test]
fn test_pcre_study_no_pattern() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
assert_eq!(bin_pcre_study("pcre_study", &[], &empty_ops(), 0), 1);
}
#[test]
fn test_pcre_study_with_pattern() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let ops = empty_ops();
bin_pcre_compile("pcre_compile", &[s("hello")], &ops, 0);
assert_eq!(bin_pcre_study("pcre_study", &[], &ops, 0), 0);
}
#[test]
fn test_pcre_match_simple() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
bin_pcre_compile("pcre_compile", &[s("hello")], &empty_ops(), 0);
let status = bin_pcre_match("pcre_match", &[s("hello world")], &empty_ops(), 0);
assert_eq!(status, 0);
assert_eq!(
crate::ported::params::getsparam("MATCH").as_deref(),
Some("hello")
);
}
#[test]
fn test_pcre_match_no_match() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
bin_pcre_compile("pcre_compile", &[s("hello")], &empty_ops(), 0);
let status = bin_pcre_match("pcre_match", &[s("goodbye world")], &empty_ops(), 0);
assert_eq!(status, 1);
}
#[test]
fn test_pcre_match_captures() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
bin_pcre_compile("pcre_compile", &[s(r"(\w+) (\w+)")], &empty_ops(), 0);
let status = bin_pcre_match("pcre_match", &[s("hello world")], &empty_ops(), 0);
assert_eq!(status, 0);
let caps = crate::ported::params::getaparam("match").unwrap_or_default();
assert_eq!(caps.len(), 2);
assert_eq!(caps[0], "hello");
assert_eq!(caps[1], "world");
}
#[test]
fn test_cond_pcre_match() {
let _g = crate::test_util::global_state_lock();
let m = cond_pcre_match(&[s("hello world"), s("hello")], 0);
assert_eq!(m, 1);
let m = cond_pcre_match(&[s("hello world"), s("(?i)HELLO")], 0);
assert_eq!(m, 1);
let m = cond_pcre_match(&[s("hello world"), s("HELLO")], 0);
assert_eq!(m, 0);
}
#[test]
fn test_builtin_pcre_compile_no_args() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
assert_eq!(
bin_pcre_compile("pcre_compile", &[s("[")], &empty_ops(), 0),
1
);
}
#[test]
fn test_builtin_pcre_match_no_pattern() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let status = bin_pcre_match("pcre_match", &[s("test")], &empty_ops(), 0);
assert_eq!(status, 1);
}
#[test]
fn getposint_parses_positive_decimal() {
let _g = crate::test_util::global_state_lock();
let r = getposint("42", "test");
assert_eq!(r, 42);
}
#[test]
fn getposint_zero_is_valid() {
let _g = crate::test_util::global_state_lock();
let r = getposint("0", "test");
assert_eq!(r, 0);
}
#[test]
fn getposint_non_numeric_returns_negative() {
let _g = crate::test_util::global_state_lock();
let r = getposint("abc", "test");
assert!(
r < 0,
"non-numeric must return negative sentinel, got {}",
r
);
}
#[test]
fn bin_pcre_compile_no_args_compiles_empty_pattern() {
let _g = crate::test_util::global_state_lock();
PCRE_PATTERN.with(|r| *r.borrow_mut() = None);
let r = bin_pcre_compile("pcre_compile", &[], &empty_ops(), 0);
assert_eq!(r, 0, "C body has no arity check; empty pattern compiles");
}
#[test]
fn bin_pcre_match_no_args_returns_one() {
let _g = crate::test_util::global_state_lock();
bin_pcre_compile("pcre_compile", &[s("x")], &empty_ops(), 0);
let status = bin_pcre_match("pcre_match", &[], &empty_ops(), 0);
assert_eq!(status, 1, "no subject must surface as error");
}
#[test]
fn cond_pcre_match_malformed_pattern_returns_no_match() {
let _g = crate::test_util::global_state_lock();
let m = cond_pcre_match(&[s("anything"), s("[")], 0);
assert_eq!(m, 0, "malformed regex must fail-soft to no-match");
}
#[test]
fn cond_pcre_match_caret_anchor_requires_start() {
let _g = crate::test_util::global_state_lock();
let m = cond_pcre_match(&[s("foo bar"), s("^foo")], 0);
assert_eq!(m, 1, "caret matches at start");
let m = cond_pcre_match(&[s("bar foo"), s("^foo")], 0);
assert_eq!(m, 0, "caret must NOT match mid-string");
}
#[test]
fn module_lifecycle_shims_all_return_zero() {
let _g = crate::test_util::global_state_lock();
let m: *const module = std::ptr::null();
assert_eq!(setup_(m), 0);
assert_eq!(boot_(m), 0);
assert_eq!(cleanup_(m), 0);
assert_eq!(finish_(m), 0);
}
#[test]
fn getposint_rejects_trailing_garbage() {
let _g = crate::test_util::global_state_lock();
assert_eq!(
getposint("42abc", "test"),
-1,
"c:317 — *eptr='a' truthy → error"
);
assert_eq!(
getposint("100x", "test"),
-1,
"c:317 — trailing non-digit must reject"
);
}
#[test]
fn getposint_rejects_trailing_whitespace() {
let _g = crate::test_util::global_state_lock();
assert_eq!(
getposint("42 ", "test"),
-1,
"c:317 — trailing space → *eptr=' ' → error"
);
assert_eq!(
getposint("42\t", "test"),
-1,
"c:317 — trailing tab → *eptr='\\t' → error"
);
}
#[test]
fn getposint_skips_leading_whitespace() {
let _g = crate::test_util::global_state_lock();
assert_eq!(
getposint(" 42", "test"),
42,
"c:312 — zstrtol skips leading whitespace"
);
}
#[test]
fn getposint_rejects_negative() {
let _g = crate::test_util::global_state_lock();
assert_eq!(
getposint("-1", "test"),
-1,
"c:317 — `ret < 0` branch fires for negative input"
);
assert_eq!(getposint("-100", "test"), -1);
}
#[test]
fn getposint_empty_input_returns_zero() {
let _g = crate::test_util::global_state_lock();
assert_eq!(
getposint("", "test"),
0,
"c:312-325 — empty input → 0 (no error)"
);
}
#[test]
fn pcre_corpus_cond_match_charclass_quantifier() {
let _g = crate::test_util::global_state_lock();
let r = cond_pcre_match(
&["abc123".to_string(), "[a-z]+[0-9]+".to_string()], 0);
assert_eq!(r, 1, "regex match succeeds");
}
#[test]
fn pcre_corpus_cond_match_no_digits() {
let _g = crate::test_util::global_state_lock();
let r = cond_pcre_match(
&["xyz".to_string(), "[0-9]+".to_string()], 0);
assert_eq!(r, 0, "no digits in 'xyz' = false");
}
#[test]
fn pcre_corpus_cond_match_empty_pattern_matches_empty() {
let _g = crate::test_util::global_state_lock();
let r = cond_pcre_match(
&["".to_string(), "".to_string()], 0);
assert_eq!(r, 1);
}
#[test]
fn pcre_corpus_cond_match_sets_MATCH() {
let _g = crate::test_util::global_state_lock();
let _ = cond_pcre_match(
&["abc123".to_string(), "[a-z]+[0-9]+".to_string()], 0);
assert_eq!(
crate::ported::params::getsparam("MATCH").as_deref(),
Some("abc123"),
"$MATCH = whole-match",
);
}
#[test]
fn pcre_corpus_cond_match_sets_match_array() {
let _g = crate::test_util::global_state_lock();
let _ = cond_pcre_match(
&["abc123".to_string(), "([a-z]+)([0-9]+)".to_string()], 0);
let m = crate::ported::params::getaparam("match");
assert_eq!(
m.as_deref(),
Some(&["abc".to_string(), "123".to_string()][..]),
"$match[1..N] populated from capture groups",
);
}
#[test]
fn pcre_corpus_cond_match_one_arg_returns_zero() {
let _g = crate::test_util::global_state_lock();
let r = cond_pcre_match(&["only".to_string()], 0);
assert_eq!(r, 0);
}
#[test]
fn pcre_corpus_cond_match_invalid_pattern_returns_zero() {
let _g = crate::test_util::global_state_lock();
let r = cond_pcre_match(
&["abc".to_string(), "[unterminated".to_string()], 0);
assert_eq!(r, 0, "invalid pattern = no match");
}
}