#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
use std::sync::Mutex;
use std::sync::atomic::{AtomicI32, AtomicUsize, Ordering};
pub const P_END: u8 = 0x00; pub const P_EXCSYNC: u8 = 0x01; pub const P_EXCEND: u8 = 0x02; pub const P_BACK: u8 = 0x03; pub const P_EXACTLY: u8 = 0x04; pub const P_NOTHING: u8 = 0x05; pub const P_ONEHASH: u8 = 0x06; pub const P_TWOHASH: u8 = 0x07; pub const P_GFLAGS: u8 = 0x08; pub const P_ISSTART: u8 = 0x09; pub const P_ISEND: u8 = 0x0a; pub const P_COUNTSTART: u8 = 0x0b; pub const P_COUNT: u8 = 0x0c; pub const P_BRANCH: u8 = 0x20; pub const P_WBRANCH: u8 = 0x21; pub const P_EXCLUDE: u8 = 0x30; pub const P_EXCLUDP: u8 = 0x31; pub const P_ANY: u8 = 0x40; pub const P_ANYOF: u8 = 0x41; pub const P_ANYBUT: u8 = 0x42; pub const P_STAR: u8 = 0x43; pub const P_NUMRNG: u8 = 0x44; pub const P_NUMFROM: u8 = 0x45; pub const P_NUMTO: u8 = 0x46; pub const P_NUMANY: u8 = 0x47; pub const P_OPEN: u8 = 0x80; pub const P_CLOSE: u8 = 0x90;
#[inline]
pub fn P_ISBRANCH(op: u8) -> bool { (op & 0x20) != 0 }
#[inline]
pub fn P_ISEXCLUDE(op: u8) -> bool { (op & 0x30) == 0x30 }
#[inline]
pub fn P_NOTDOT(op: u8) -> bool { (op & 0x40) != 0 }
pub const P_SIMPLE: i32 = 0x01; pub const P_HSTART: i32 = 0x02; pub const P_PURESTR: i32 = 0x04;
#[allow(non_camel_case_types)]
pub type Patprog = Box<(patprog, Vec<u8>)>;
pub use crate::ported::zsh_h::{
PAT_HEAPDUP, PAT_FILE, PAT_FILET, PAT_ANY, PAT_NOANCH, PAT_NOGLD,
PAT_PURES, PAT_STATIC, PAT_SCAN, PAT_ZDUP, PAT_NOTSTART, PAT_NOTEND,
PAT_HAS_EXCLUDP, PAT_LCMATCHUC,
};
pub const NSUBEXP: usize = 9;
pub use crate::ported::zsh_h::{
GF_LCMATCHUC, GF_IGNCASE, GF_BACKREF, GF_MATCHREF, GF_MULTIBYTE,
};
use crate::zsh_h::{patprog, ZPC_BAR, ZPC_BNULLKEEP, ZPC_COUNT, ZPC_HASH, ZPC_HAT, ZPC_INANG, ZPC_INBRACK, ZPC_INPAR, ZPC_NULL, ZPC_OUTPAR, ZPC_QUEST, ZPC_SLASH, ZPC_STAR, ZPC_TILDE};
const I_OP: usize = 0; const I_NEXT: usize = 1; const I_BODY: usize = 5;
pub static patout: Mutex<Vec<u8>> = Mutex::new(Vec::new());
static PATCOMPILE_LOCK: Mutex<()> = Mutex::new(());
pub static patparse: Mutex<String> = Mutex::new(String::new()); pub static patstart: Mutex<String> = Mutex::new(String::new());
pub static patparse_off: AtomicUsize = AtomicUsize::new(0);
pub static patnpar: AtomicI32 = AtomicI32::new(0);
pub static patflags: AtomicI32 = AtomicI32::new(0);
pub static patglobflags: AtomicI32 = AtomicI32::new(0);
pub static errsfound: AtomicI32 = AtomicI32::new(0);
pub static forceerrs: AtomicI32 = AtomicI32::new(-1);
pub static patglobflags_orig: AtomicI32 = AtomicI32::new(0);
pub static zpc_special: Mutex<[u8; ZPC_COUNT as usize]> = Mutex::new([0u8; ZPC_COUNT as usize]);
pub static patstrcache: Mutex<String> = Mutex::new(String::new());
pub const Marker: u8 = 0x80;
#[allow(unused_variables)]
fn patadd(add: Option<&[u8]>, ch: u8, n: i64, paflags: i32) -> i64 { let mut buf = patout.lock().unwrap();
let start = buf.len() as i64;
if let Some(bytes) = add {
let n_actual = bytes.len().min(n as usize);
buf.extend_from_slice(&bytes[..n_actual]);
} else {
for _ in 0..n {
buf.push(ch);
}
}
start
}
fn patnode(op: u8) -> usize { let mut buf = patout.lock().unwrap();
let off = buf.len();
buf.push(op); buf.extend_from_slice(&[0, 0, 0, 0]); off
}
fn patinsert(op: u8, opnd: usize, xtra: Option<&[u8]>, sz: usize) { let mut buf = patout.lock().unwrap();
let header_sz = 1 + 4; let total = header_sz + sz;
let mut inserted = vec![0u8; total];
inserted[0] = op;
if let Some(x) = xtra {
let copy_n = x.len().min(sz);
inserted[header_sz..header_sz + copy_n].copy_from_slice(&x[..copy_n]);
}
buf.splice(opnd..opnd, inserted);
fixup_offsets_after_insert(&mut buf, opnd, total as u32);
}
fn fixup_offsets_after_insert(buf: &mut [u8], opnd: usize, delta: u32) {
let mut i = 0;
while i + I_BODY <= buf.len() {
let op = buf[i + I_OP];
if op == 0 { i += 1; continue; } let next_bytes = &buf[i + I_NEXT..i + I_NEXT + 4];
let cur = u32::from_le_bytes(next_bytes.try_into().unwrap());
if cur != 0 {
let abs = cur as usize;
if abs >= opnd && abs <= buf.len() {
let new = cur + delta;
buf[i + I_NEXT..i + I_NEXT + 4].copy_from_slice(&new.to_le_bytes());
}
}
i = advance_past_instr(buf, i);
if i == 0 { break; }
}
}
fn advance_past_instr(buf: &[u8], pos: usize) -> usize {
if pos + I_BODY > buf.len() { return 0; }
let op = buf[pos + I_OP];
let body_start = pos + I_BODY;
match op {
P_END | P_NOTHING | P_BACK | P_EXCSYNC | P_EXCEND
| P_ISSTART | P_ISEND | P_COUNTSTART | P_ANY | P_STAR | P_NUMANY
=> body_start,
P_GFLAGS => body_start + 4, P_EXACTLY => {
if body_start + 4 > buf.len() { return 0; }
let len = u32::from_le_bytes(buf[body_start..body_start + 4].try_into().unwrap()) as usize;
body_start + 4 + len
}
P_ANYOF | P_ANYBUT => {
if body_start + 4 > buf.len() { return 0; }
let len = u32::from_le_bytes(buf[body_start..body_start + 4].try_into().unwrap()) as usize;
body_start + 4 + len
}
P_ONEHASH | P_TWOHASH | P_BRANCH | P_WBRANCH
| P_EXCLUDE | P_EXCLUDP => body_start,
P_OPEN..=0x88 | P_CLOSE..=0x98 => body_start,
P_NUMRNG => body_start + 16, P_NUMFROM | P_NUMTO => body_start + 8,
P_COUNT => body_start + 16, _ => body_start,
}
}
fn set_next(pos: usize, val: usize) {
let mut buf = patout.lock().unwrap();
if pos + I_NEXT + 4 <= buf.len() {
buf[pos + I_NEXT..pos + I_NEXT + 4].copy_from_slice(&(val as u32).to_le_bytes());
}
}
fn pattail(p: usize, val: usize) { let mut buf = patout.lock().unwrap();
let mut cur = p;
loop {
if cur + I_BODY > buf.len() { return; }
let next_bytes: [u8; 4] = buf[cur + I_NEXT..cur + I_NEXT + 4].try_into().unwrap();
let next = u32::from_le_bytes(next_bytes) as usize;
if next == 0 { break; }
cur = next;
}
let val_bytes = (val as u32).to_le_bytes();
if cur + I_NEXT + 4 <= buf.len() {
buf[cur + I_NEXT..cur + I_NEXT + 4].copy_from_slice(&val_bytes);
}
}
fn patoptail(p: usize, val: usize) { let buf = patout.lock().unwrap();
if p + I_OP >= buf.len() { return; }
let op = buf[p + I_OP];
drop(buf);
if P_ISBRANCH(op) {
pattail(p + I_BODY, val);
}
}
pub fn patcompcharsset() { let mut sp = zpc_special.lock().unwrap();
*sp = [0u8; ZPC_COUNT as usize];
sp[ZPC_SLASH as usize] = b'/';
sp[ZPC_NULL as usize] = 0;
sp[ZPC_BAR as usize] = b'|';
sp[ZPC_OUTPAR as usize] = b')';
sp[ZPC_TILDE as usize] = b'~';
sp[ZPC_INPAR as usize] = b'(';
sp[ZPC_QUEST as usize] = b'?';
sp[ZPC_STAR as usize] = b'*';
sp[ZPC_INBRACK as usize] = b'[';
sp[ZPC_INANG as usize] = b'<';
sp[ZPC_HAT as usize] = b'^';
sp[ZPC_HASH as usize] = b'#';
sp[ZPC_BNULLKEEP as usize] = 0;
}
pub fn patcompstart() { patout.lock().unwrap().clear();
patnpar.store(1, Ordering::Relaxed);
patflags.store(0, Ordering::Relaxed);
patglobflags.store(GF_MULTIBYTE, Ordering::Relaxed); errsfound.store(0, Ordering::Relaxed);
forceerrs.store(-1, Ordering::Relaxed);
patparse_off.store(0, Ordering::Relaxed);
patcompcharsset();
}
pub fn patcompile(exp: &str, inflags: i32, mut endexp: Option<&mut String>) -> Option<Patprog>
{
let _compile_guard = PATCOMPILE_LOCK.lock().unwrap_or_else(|e| e.into_inner());
patcompstart();
*patstart.lock().unwrap() = exp.to_string();
*patparse.lock().unwrap() = exp.to_string();
patflags.store(inflags & !(PAT_PURES | PAT_HAS_EXCLUDP) as i32, Ordering::Relaxed); patglobflags.store(0, Ordering::Relaxed);
let mut hoisted_globflags: i32 = GF_MULTIBYTE;
loop {
let off = patparse_off.load(Ordering::Relaxed);
let p = patparse.lock().unwrap();
if off + 1 >= p.len() || &p.as_bytes()[off..off + 2] != b"(#" {
break;
}
let rest = p[off..].to_string();
drop(p);
match patgetglobflags(&rest) {
Some((bits, _assert, consumed)) => {
hoisted_globflags |= bits & (GF_IGNCASE | GF_LCMATCHUC | GF_MULTIBYTE
| GF_BACKREF | GF_MATCHREF);
if (bits & GF_MULTIBYTE) == 0 && rest.contains('U') {
hoisted_globflags &= !GF_MULTIBYTE;
}
patparse_off.fetch_add(consumed, Ordering::Relaxed);
}
None => break,
}
}
let mut flagp: i32 = 0;
let root = patcompswitch(0, &mut flagp);
if root < 0 {
return None; }
let end_off = patnode(P_END);
chain_branches_to(root as usize, end_off);
let code = patout.lock().unwrap().clone();
let consumed_off = patparse_off.load(Ordering::Relaxed);
if let Some(end) = endexp.as_deref_mut() {
let parse = patparse.lock().unwrap();
*end = parse[consumed_off..].to_string();
}
Some(Box::new((
patprog {
startoff: 0,
size: code.len() as i64,
mustoff: 0,
patmlen: 0,
globflags: hoisted_globflags,
globend: patglobflags.load(Ordering::Relaxed),
flags: patflags.load(Ordering::Relaxed) | hoisted_globflags,
patnpar: patnpar.load(Ordering::Relaxed) - 1,
patstartch: 0,
},
code,
)))
}
pub fn patcompswitch(paren: i32, flagp: &mut i32) -> i64 { let starter = patnode(P_BRANCH);
let mut branch_flags: i32 = 0;
let first_branch = patcompbranch(&mut branch_flags, paren);
if first_branch < 0 { return -1; }
*flagp |= branch_flags & P_HSTART;
let mut last_branch = starter;
loop {
let off = patparse_off.load(Ordering::Relaxed);
let parse = patparse.lock().unwrap();
if off >= parse.len() { break; }
let c = parse.as_bytes()[off];
if c != b'|' { break; }
drop(parse);
patparse_off.fetch_add(1, Ordering::Relaxed);
let br = patnode(P_BRANCH);
set_next(last_branch, br);
let mut bf: i32 = 0;
let inner = patcompbranch(&mut bf, paren);
if inner < 0 { return -1; }
*flagp |= bf & P_HSTART;
last_branch = br;
}
let _ = first_branch;
starter as i64
}
fn chain_branches_to(starter: usize, target: usize) {
let mut cur = starter;
loop {
pattail(cur + I_BODY, target);
let buf = patout.lock().unwrap();
if cur + I_NEXT + 4 > buf.len() { break; }
let nb: [u8; 4] = buf[cur + I_NEXT..cur + I_NEXT + 4].try_into().unwrap();
let n = u32::from_le_bytes(nb) as usize;
drop(buf);
if n == 0 { break; }
cur = n;
}
}
pub fn patcompbranch(flagp: &mut i32, paren: i32) -> i64 { let mut chain_start: i64 = -1;
let mut last_tail: usize = 0;
*flagp = P_PURESTR;
loop {
let off = patparse_off.load(Ordering::Relaxed);
let snapshot: Vec<u8> = {
let parse = patparse.lock().unwrap();
parse.as_bytes().to_vec()
};
if off >= snapshot.len() { break; }
let c = snapshot[off];
if c == b'|' || c == b')' { break; }
let bytes = snapshot.as_slice();
if off + 2 < bytes.len() && bytes[off] == b'(' && bytes[off + 1] == b'#'
&& bytes[off + 2] == b'c'
{
let mut j = off + 3;
let mut min: i64 = 0;
let min_start = j;
while j < bytes.len() && bytes[j].is_ascii_digit() {
min = min * 10 + (bytes[j] - b'0') as i64;
j += 1;
}
let mut max: i64 = i64::MAX;
if j > min_start {
if j < bytes.len() && bytes[j] == b',' {
j += 1;
let max_start = j;
let mut mx: i64 = 0;
while j < bytes.len() && bytes[j].is_ascii_digit() {
mx = mx * 10 + (bytes[j] - b'0') as i64;
j += 1;
}
if j > max_start { max = mx; }
} else {
max = min;
}
if j < bytes.len() && bytes[j] == b')' {
j += 1;
patparse_off.store(j, Ordering::Relaxed);
let count_off = patnode(P_COUNT);
let mut buf = patout.lock().unwrap();
buf.extend_from_slice(&min.to_le_bytes());
buf.extend_from_slice(&max.to_le_bytes());
drop(buf);
let mut piece_flags: i32 = 0;
let mut piece_tail: usize = 0;
let piece = patcomppiece(&mut piece_flags, paren, &mut piece_tail);
if piece < 0 { return -1; }
set_next(piece_tail, 0);
if chain_start < 0 { chain_start = count_off as i64; }
else { set_next(last_tail, count_off); }
last_tail = count_off;
continue;
}
}
}
if off + 1 < bytes.len() && bytes[off] == b'(' && bytes[off + 1] == b'#' {
let rest = std::str::from_utf8(&bytes[off..]).unwrap_or("").to_string();
if let Some((bits, assertp, consumed)) = patgetglobflags(&rest) {
patparse_off.fetch_add(consumed, Ordering::Relaxed);
let flag_bits = bits & (GF_IGNCASE | GF_LCMATCHUC | GF_MULTIBYTE);
if flag_bits != 0 || (flag_bits == 0 && assertp == 0) {
let gf_off = patnode(P_GFLAGS);
let mut buf = patout.lock().unwrap();
buf.extend_from_slice(&flag_bits.to_le_bytes());
drop(buf);
if chain_start < 0 { chain_start = gf_off as i64; }
else { set_next(last_tail, gf_off); }
last_tail = gf_off;
}
if assertp != 0 {
let as_off = patnode(assertp as u8);
if chain_start < 0 { chain_start = as_off as i64; }
else { set_next(last_tail, as_off); }
last_tail = as_off;
}
continue;
}
}
let mut piece_flags: i32 = 0;
let mut piece_tail: usize = 0;
let piece = patcomppiece(&mut piece_flags, paren, &mut piece_tail);
if piece < 0 { return -1; }
if chain_start < 0 {
chain_start = piece;
} else {
set_next(last_tail, piece as usize);
}
last_tail = piece_tail;
*flagp &= piece_flags;
}
if chain_start < 0 {
chain_start = patnode(P_NOTHING) as i64;
}
chain_start
}
pub fn patcomppiece(flagp: &mut i32, paren: i32, tail_out: &mut usize) -> i64 { let _ = paren;
let off = patparse_off.load(Ordering::Relaxed);
let parse = patparse.lock().unwrap();
if off >= parse.len() {
return patnode(P_NOTHING) as i64;
}
let bytes = parse.as_bytes();
let c = bytes[off];
drop(parse);
let atom = match c {
b'?' => {
patparse_off.fetch_add(1, Ordering::Relaxed);
*flagp |= P_SIMPLE;
*flagp &= !P_PURESTR;
let h = patnode(P_ANY);
*tail_out = h;
h as i64
}
b'*' => {
patparse_off.fetch_add(1, Ordering::Relaxed);
*flagp &= !P_PURESTR;
let h = patnode(P_STAR);
*tail_out = h;
h as i64
}
b'[' => {
patparse_off.fetch_add(1, Ordering::Relaxed);
*flagp |= P_SIMPLE;
*flagp &= !P_PURESTR;
let mut chars: Vec<u8> = Vec::new();
let mut negate = false;
let bracket_start = patparse_off.load(Ordering::Relaxed);
let parse_b = patparse.lock().unwrap();
let bb = parse_b.as_bytes();
let mut i_b = bracket_start;
if i_b < bb.len() && (bb[i_b] == b'^' || bb[i_b] == b'!') {
negate = true;
i_b += 1;
}
while i_b < bb.len() && bb[i_b] != b']' {
if i_b + 1 < bb.len() && bb[i_b] == b'[' && bb[i_b+1] == b':' {
let class_start = i_b + 2;
let mut j_b = class_start;
while j_b + 1 < bb.len() && !(bb[j_b] == b':' && bb[j_b+1] == b']') {
j_b += 1;
}
if j_b + 1 < bb.len() {
let class_name = std::str::from_utf8(&bb[class_start..j_b]).unwrap_or("");
match class_name {
"alpha" => { for c in b'a'..=b'z' { chars.push(c); } for c in b'A'..=b'Z' { chars.push(c); } }
"upper" => { for c in b'A'..=b'Z' { chars.push(c); } }
"lower" => { for c in b'a'..=b'z' { chars.push(c); } }
"digit" => { for c in b'0'..=b'9' { chars.push(c); } }
"xdigit" => { for c in b'0'..=b'9' { chars.push(c); } for c in b'a'..=b'f' { chars.push(c); } for c in b'A'..=b'F' { chars.push(c); } }
"alnum" => { for c in b'a'..=b'z' { chars.push(c); } for c in b'A'..=b'Z' { chars.push(c); } for c in b'0'..=b'9' { chars.push(c); } }
"space" => { for b in b" \t\n\r\x0b\x0c".iter() { chars.push(*b); } }
"blank" => { chars.push(b' '); chars.push(b'\t'); }
"punct" => { for b in b"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~".iter() { chars.push(*b); } }
"cntrl" => { for c in 0u8..=31 { chars.push(c); } chars.push(127); }
"print" => { for c in 32u8..=126 { chars.push(c); } }
"graph" => { for c in 33u8..=126 { chars.push(c); } }
_ => {}
}
i_b = j_b + 2;
continue;
}
}
if i_b + 2 < bb.len() && bb[i_b+1] == b'-' && bb[i_b+2] != b']' {
let lo = bb[i_b];
let hi = bb[i_b+2];
for c in lo..=hi { chars.push(c); }
i_b += 3;
} else {
chars.push(bb[i_b]);
i_b += 1;
}
}
drop(parse_b);
if let Some(p_lock) = patparse.lock().ok() {
if i_b < p_lock.len() && p_lock.as_bytes()[i_b] == b']' { i_b += 1; }
}
patparse_off.store(i_b, Ordering::Relaxed);
let opcode = if negate { P_ANYBUT } else { P_ANYOF };
let off2 = patnode(opcode);
let mut buf = patout.lock().unwrap();
let len = chars.len() as u32;
buf.extend_from_slice(&len.to_le_bytes());
buf.extend_from_slice(&chars);
*tail_out = off2;
off2 as i64
}
b'(' => {
patparse_off.fetch_add(1, Ordering::Relaxed);
*flagp &= !P_PURESTR;
let n = patnpar.fetch_add(1, Ordering::Relaxed);
if n >= NSUBEXP as i32 {
return -1;
}
let opcode = P_OPEN + n as u8;
let open_off = patnode(opcode);
let mut inner_flags: i32 = 0;
let inner = patcompswitch(1, &mut inner_flags);
if inner < 0 { return -1; }
let cur_off = patparse_off.load(Ordering::Relaxed);
let p = patparse.lock().unwrap();
if cur_off >= p.len() || p.as_bytes()[cur_off] != b')' {
return -1;
}
drop(p);
patparse_off.fetch_add(1, Ordering::Relaxed);
let close_off = patnode(P_CLOSE + n as u8);
set_next(open_off, inner as usize);
pattail(open_off, close_off);
chain_branches_to(inner as usize, close_off);
*flagp &= !P_PURESTR;
*tail_out = close_off;
open_off as i64
}
b'\\' => {
patparse_off.fetch_add(1, Ordering::Relaxed);
let p = patparse.lock().unwrap();
let off2 = patparse_off.load(Ordering::Relaxed);
if off2 >= p.len() { return -1; }
let escaped = p.as_bytes()[off2];
drop(p);
patparse_off.fetch_add(1, Ordering::Relaxed);
*flagp |= P_SIMPLE;
let lit_off = patnode(P_EXACTLY);
let mut buf_lit = patout.lock().unwrap();
buf_lit.extend_from_slice(&1u32.to_le_bytes());
buf_lit.push(escaped);
*tail_out = lit_off;
lit_off as i64
}
b'<' => {
patparse_off.fetch_add(1, Ordering::Relaxed);
*flagp &= !P_PURESTR;
let parse_n = patparse.lock().unwrap();
let nb = parse_n.as_bytes();
let mut j = patparse_off.load(Ordering::Relaxed);
let mut len_flag: u8 = 0; let mut from: i64 = 0;
let lo_start = j;
while j < nb.len() && nb[j].is_ascii_digit() {
from = from * 10 + (nb[j] - b'0') as i64;
j += 1;
}
if j > lo_start { len_flag |= 1; } if j >= nb.len() || nb[j] != b'-' {
drop(parse_n);
return -1;
}
j += 1; let mut to: i64 = 0;
let hi_start = j;
while j < nb.len() && nb[j].is_ascii_digit() {
to = to * 10 + (nb[j] - b'0') as i64;
j += 1;
}
if j > hi_start { len_flag |= 2; } if j >= nb.len() || nb[j] != b'>' {
drop(parse_n);
return -1; }
j += 1;
drop(parse_n);
patparse_off.store(j, Ordering::Relaxed);
let off2 = match len_flag { 3 => { let off2 = patnode(P_NUMRNG);
let mut buf = patout.lock().unwrap();
buf.extend_from_slice(&from.to_le_bytes());
buf.extend_from_slice(&to.to_le_bytes());
off2
}
2 => { let off2 = patnode(P_NUMTO);
let mut buf = patout.lock().unwrap();
buf.extend_from_slice(&to.to_le_bytes());
off2
}
1 => { let off2 = patnode(P_NUMFROM);
let mut buf = patout.lock().unwrap();
buf.extend_from_slice(&from.to_le_bytes());
off2
}
_ => patnode(P_NUMANY), };
*tail_out = off2;
off2 as i64
}
_ => {
let mut buf: Vec<u8> = Vec::new();
let mut local_off = off;
let p = patparse.lock().unwrap();
while local_off < p.len() {
let b = p.as_bytes()[local_off];
if matches!(b, b'?'|b'*'|b'['|b'('|b')'|b'|'|b'\\'|b'#'|b'^'|b'<') {
break;
}
buf.push(b);
local_off += 1;
}
drop(p);
if buf.is_empty() {
return -1;
}
patparse_off.store(local_off, Ordering::Relaxed);
*flagp |= P_SIMPLE;
let lit_off = patnode(P_EXACTLY);
let mut buf_lit = patout.lock().unwrap();
let len = buf.len() as u32;
buf_lit.extend_from_slice(&len.to_le_bytes());
buf_lit.extend_from_slice(&buf);
*tail_out = lit_off;
lit_off as i64
}
};
if atom < 0 { return atom; }
let q_off = patparse_off.load(Ordering::Relaxed);
let parse2 = patparse.lock().unwrap();
if q_off < parse2.len() && parse2.as_bytes()[q_off] == b'#' {
let two = q_off + 1 < parse2.len() && parse2.as_bytes()[q_off + 1] == b'#';
drop(parse2);
let consume = if two { 2 } else { 1 };
patparse_off.fetch_add(consume, Ordering::Relaxed);
let quant_op = if two { P_TWOHASH } else { P_ONEHASH };
patinsert(quant_op, atom as usize, None, 0);
*flagp &= !P_PURESTR;
*tail_out = atom as usize;
}
atom
}
#[allow(unused_variables)]
pub fn patcompnot(paren: i32, flagsp: &mut i32) -> i64 { -1
}
pub fn patgetglobflags(s: &str) -> Option<(i32, i64, usize)> { let bytes = s.as_bytes();
if !s.starts_with("(#") { return None; }
let mut i = 2;
let mut bits: i32 = 0;
let mut assertp: i64 = 0;
while i < bytes.len() && bytes[i] != b')' {
match bytes[i] { b'i' => { bits |= GF_IGNCASE; bits &= !GF_LCMATCHUC; i += 1; } b'I' => { bits &= !GF_IGNCASE; i += 1; } b'l' => { bits |= GF_LCMATCHUC; bits &= !GF_IGNCASE; i += 1; } b'L' => { bits &= !GF_LCMATCHUC; i += 1; }
b'b' => { bits |= GF_BACKREF; i += 1; } b'B' => { bits &= !GF_BACKREF; i += 1; } b'm' => { bits |= GF_MATCHREF; i += 1; } b'M' => { bits &= !GF_MATCHREF; i += 1; } b's' => { assertp = P_ISSTART as i64; i += 1; } b'e' => { assertp = P_ISEND as i64; i += 1; } b'u' => { bits |= GF_MULTIBYTE; i += 1; }
b'U' => { bits &= !GF_MULTIBYTE; i += 1; }
b'a' => { i += 1;
let mut errs: i32 = 0;
while i < bytes.len() && bytes[i].is_ascii_digit() {
errs = errs * 10 + (bytes[i] - b'0') as i32;
i += 1;
}
if errs < 0 || errs > 254 { return None; } bits = (bits & !0xff) | (errs & 0xff); }
b'q' => { while i < bytes.len() && bytes[i] != b')' { i += 1; }
}
_ => return None,
}
}
if i >= bytes.len() { return None; }
i += 1; Some((bits, assertp, i))
}
pub fn range_type(name: &str) -> Option<usize> { POSIX_CLASS_NAMES.iter().position(|n| *n == name).map(|i| i + 1)
}
pub fn pattern_range_to_string(idx: usize) -> Option<String> { if idx == 0 { return None; }
POSIX_CLASS_NAMES.get(idx - 1).map(|n| format!("[:{}:]", n))
}
const POSIX_CLASS_NAMES: &[&str] = &[
"alpha", "alnum", "blank", "cntrl", "digit", "graph", "lower",
"print", "punct", "space", "upper", "xdigit",
];
pub fn clear_shiftstate() {}
pub fn metacharinc(s: &str, pos: usize) -> usize { s[pos..].chars().next().map(|c| pos + c.len_utf8()).unwrap_or(pos)
}
pub fn charref(s: &str, pos: usize) -> Option<char> { s[pos..].chars().next()
}
pub fn charnext(x: &str, y: usize) -> usize { metacharinc(x, y)
}
pub fn charrefinc(s: &str, pos: &mut usize) -> Option<char> { let c = s[*pos..].chars().next()?;
*pos += c.len_utf8();
Some(c)
}
pub fn charsub(x: &str, y: usize) -> usize { if y == 0 { return 0; }
let w = x[..y].chars().next_back().map(|c| c.len_utf8()).unwrap_or(1);
y - w
}
#[derive(Clone)]
#[allow(non_camel_case_types)]
pub struct rpat {
pub patbeginp: [usize; NSUBEXP], pub patendp: [usize; NSUBEXP], pub captures_set: u16, }
impl rpat {
fn new() -> Self {
Self {
patbeginp: [usize::MAX; NSUBEXP],
patendp: [0; NSUBEXP],
captures_set: 0,
}
}
}
pub fn pattry(prog: &Patprog, string: &str) -> bool { pattrylen(prog, string, string.len())
}
pub fn pattrylen(prog: &Patprog, string: &str, len: usize) -> bool { let trial = if len < string.len() { &string[..len] } else { string };
let mut state = rpat::new();
match patmatch_internal(&prog.1, 0, trial, 0, &mut state, prog.0.flags) {
Some(end_pos) => {
let no_anchor = (prog.0.flags & (PAT_NOANCH | PAT_NOTEND) as i32) != 0; no_anchor || end_pos == trial.len()
}
None => false,
}
}
pub fn pattryrefs(prog: &Patprog, string: &str) -> Option<(bool, Vec<(usize, usize)>)> { let mut state = rpat::new();
let ok = patmatch_internal(&prog.1, 0, string, 0, &mut state, prog.0.flags).is_some();
if ok {
let mut refs = Vec::with_capacity(prog.0.patnpar as usize);
for i in 0..(prog.0.patnpar as usize).min(NSUBEXP) {
let start = state.patbeginp[i];
let end = state.patendp[i];
if (state.captures_set & (1 << i)) != 0 {
refs.push((start, end));
} else {
refs.push((0, 0));
}
}
Some((true, refs))
} else {
Some((false, Vec::new()))
}
}
pub fn patmatchlen(prog: &Patprog, string: &str) -> Option<usize> { let mut state = rpat::new();
patmatch_internal(&prog.1, 0, string, 0, &mut state, prog.0.flags)
}
fn patmatch_internal(
code: &[u8],
prog_off: usize,
string: &str,
string_off: usize,
state: &mut rpat,
glob_flags: i32,
) -> Option<usize> { let mut scan = prog_off;
let mut s_off = string_off;
let mut glob_flags = glob_flags;
while scan < code.len() {
let op = code[scan + I_OP];
let next_bytes: [u8; 4] = code[scan + I_NEXT..scan + I_NEXT + 4].try_into().unwrap();
let next = u32::from_le_bytes(next_bytes) as usize;
match op {
P_END => return Some(s_off), P_NOTHING => { }
P_BACK => { }
P_EXACTLY => { let body = scan + I_BODY;
let len = u32::from_le_bytes(code[body..body + 4].try_into().unwrap()) as usize;
let str_bytes = &code[body + 4..body + 4 + len];
let input_bytes = string.as_bytes();
if s_off + len > input_bytes.len() { return None; }
let igncase = (glob_flags & (GF_IGNCASE | GF_LCMATCHUC)) != 0;
let multibyte = (glob_flags & GF_MULTIBYTE) != 0; if igncase {
let inp_slice = &input_bytes[s_off..s_off + len];
if multibyte {
let pat_str = std::str::from_utf8(str_bytes).ok();
let inp_str = std::str::from_utf8(inp_slice).ok();
if let (Some(p), Some(i)) = (pat_str, inp_str) {
let mut pc = p.chars();
let mut ic = i.chars();
loop {
match (pc.next(), ic.next()) {
(None, None) => break,
(Some(_), None) | (None, Some(_)) => return None,
(Some(a), Some(b)) => {
let af: String = a.to_lowercase().collect();
let bf: String = b.to_lowercase().collect();
if af != bf { return None; }
}
}
}
} else {
for k in 0..len {
if inp_slice[k].to_ascii_lowercase()
!= str_bytes[k].to_ascii_lowercase() { return None; }
}
}
} else {
for k in 0..len {
if inp_slice[k].to_ascii_lowercase()
!= str_bytes[k].to_ascii_lowercase() { return None; }
}
}
} else if &input_bytes[s_off..s_off + len] != str_bytes {
return None;
}
s_off += len;
}
P_ANY => { let s = &string[s_off..];
let c = s.chars().next()?;
s_off += c.len_utf8();
}
P_ANYOF => { let body = scan + I_BODY;
let len = u32::from_le_bytes(code[body..body + 4].try_into().unwrap()) as usize;
let set = &code[body + 4..body + 4 + len];
let input_bytes = string.as_bytes();
if s_off >= input_bytes.len() { return None; }
let b = input_bytes[s_off];
let igncase = (glob_flags & (GF_IGNCASE | GF_LCMATCHUC)) != 0;
let found = if igncase {
let lb = b.to_ascii_lowercase();
set.iter().any(|&c| c.to_ascii_lowercase() == lb)
} else {
set.contains(&b)
};
if !found { return None; }
s_off += 1;
}
P_ANYBUT => {
let body = scan + I_BODY;
let len = u32::from_le_bytes(code[body..body + 4].try_into().unwrap()) as usize;
let set = &code[body + 4..body + 4 + len];
let input_bytes = string.as_bytes();
if s_off >= input_bytes.len() { return None; }
let b = input_bytes[s_off];
let igncase = (glob_flags & (GF_IGNCASE | GF_LCMATCHUC)) != 0;
let found = if igncase {
let lb = b.to_ascii_lowercase();
set.iter().any(|&c| c.to_ascii_lowercase() == lb)
} else {
set.contains(&b)
};
if found { return None; }
s_off += 1;
}
P_STAR => { let input_bytes = string.as_bytes();
let max = input_bytes.len() - s_off;
let mut consumed = max;
loop {
let mut sub_state = state.clone();
if let Some(end) = patmatch_internal(code, next, string, s_off + consumed, &mut sub_state, glob_flags) {
*state = sub_state;
return Some(end);
}
if consumed == 0 { return None; }
consumed -= 1;
}
}
P_ONEHASH | P_TWOHASH => { let operand = scan + I_BODY;
let min = if op == P_TWOHASH { 1 } else { 0 };
let mut positions = vec![s_off];
loop {
let cur = *positions.last().unwrap();
let mut sub_state = state.clone();
if let Some(new_pos) = patmatch_internal(code, operand, string, cur, &mut sub_state, glob_flags) {
if new_pos == cur { break; } *state = sub_state;
positions.push(new_pos);
} else {
break;
}
}
if positions.len() - 1 < min { return None; }
while positions.len() > min {
let cur = *positions.last().unwrap();
let mut sub_state = state.clone();
if let Some(end) = patmatch_internal(code, next, string, cur, &mut sub_state, glob_flags) {
*state = sub_state;
return Some(end);
}
if positions.len() <= min + 1 { return None; }
positions.pop();
}
return None;
}
P_BRANCH => { let next_is_branch = next != 0
&& next < code.len()
&& (code[next + I_OP] == P_BRANCH
|| code[next + I_OP] == P_WBRANCH);
if !next_is_branch {
scan = scan + I_BODY;
continue;
}
let mut br = scan;
loop {
let br_next_bytes: [u8; 4] = code[br + I_NEXT..br + I_NEXT + 4]
.try_into().unwrap();
let br_next = u32::from_le_bytes(br_next_bytes) as usize;
let operand = br + I_BODY;
let mut sub_state = state.clone();
if let Some(end) = patmatch_internal(
code, operand, string, s_off, &mut sub_state, glob_flags
) {
*state = sub_state;
return Some(end);
}
if br_next == 0 { return None; }
let op_next = code[br_next + I_OP];
if op_next != P_BRANCH && op_next != P_WBRANCH {
return None;
}
br = br_next;
}
}
P_NUMRNG => { let body = scan + I_BODY;
let from = i64::from_le_bytes(code[body..body + 8].try_into().unwrap());
let to = i64::from_le_bytes(code[body + 8..body + 16].try_into().unwrap());
let input_bytes = string.as_bytes();
let start = s_off;
let mut k = start;
while k < input_bytes.len() && input_bytes[k].is_ascii_digit() {
k += 1;
}
if k == start { return None; }
let n: i64 = std::str::from_utf8(&input_bytes[start..k])
.ok()
.and_then(|s| s.parse::<i64>().ok())?;
if n < from || n > to { return None; }
s_off = k;
}
P_NUMFROM => {
let body = scan + I_BODY;
let from = i64::from_le_bytes(code[body..body + 8].try_into().unwrap());
let input_bytes = string.as_bytes();
let start = s_off;
let mut k = start;
while k < input_bytes.len() && input_bytes[k].is_ascii_digit() { k += 1; }
if k == start { return None; }
let n: i64 = std::str::from_utf8(&input_bytes[start..k])
.ok().and_then(|s| s.parse::<i64>().ok())?;
if n < from { return None; }
s_off = k;
}
P_NUMTO => {
let body = scan + I_BODY;
let to = i64::from_le_bytes(code[body..body + 8].try_into().unwrap());
let input_bytes = string.as_bytes();
let start = s_off;
let mut k = start;
while k < input_bytes.len() && input_bytes[k].is_ascii_digit() { k += 1; }
if k == start { return None; }
let n: i64 = std::str::from_utf8(&input_bytes[start..k])
.ok().and_then(|s| s.parse::<i64>().ok())?;
if n > to { return None; }
s_off = k;
}
P_NUMANY => {
let input_bytes = string.as_bytes();
let start = s_off;
while s_off < input_bytes.len() && input_bytes[s_off].is_ascii_digit() { s_off += 1; }
if s_off == start { return None; }
}
P_ISSTART => { if s_off != 0 { return None; }
}
P_ISEND => { if s_off < string.len() { return None; }
}
P_GFLAGS => { let body = scan + I_BODY;
let bits = i32::from_le_bytes(code[body..body + 4].try_into().unwrap());
glob_flags = (glob_flags
& !(GF_IGNCASE | GF_LCMATCHUC | GF_MULTIBYTE)) | bits;
}
P_COUNT => { let body = scan + I_BODY;
let min = i64::from_le_bytes(code[body..body + 8].try_into().unwrap());
let max = i64::from_le_bytes(code[body + 8..body + 16].try_into().unwrap());
let operand = body + 16;
let mut positions = vec![s_off];
let max_usize: i64 = max;
loop {
let cur = *positions.last().unwrap();
if (positions.len() as i64 - 1) >= max_usize { break; }
let mut sub_state = state.clone();
if let Some(new_pos) = patmatch_internal(code, operand, string, cur, &mut sub_state, glob_flags) {
if new_pos == cur { break; }
*state = sub_state;
positions.push(new_pos);
} else {
break;
}
}
let min_usize = min as usize;
if positions.len() < min_usize + 1 { return None; }
while positions.len() > min_usize {
let cur = *positions.last().unwrap();
let mut sub_state = state.clone();
if let Some(end) = patmatch_internal(code, next, string, cur, &mut sub_state, glob_flags) {
*state = sub_state;
return Some(end);
}
if positions.len() <= min_usize + 1 { return None; }
positions.pop();
}
return None;
}
op if op >= P_OPEN && op < P_CLOSE => { let n = (op - P_OPEN) as usize;
if n > 0 && n <= NSUBEXP {
state.patbeginp[n - 1] = s_off;
}
}
op if op >= P_CLOSE && op < 0xa0 => { let n = (op - P_CLOSE) as usize;
if n > 0 && n <= NSUBEXP {
state.patendp[n - 1] = s_off;
state.captures_set |= 1u16 << (n - 1);
}
}
_ => {
}
}
if next == 0 { break; }
scan = next;
}
Some(s_off)
}
pub fn patmatchrange(range: &[char], ch: char, igncase: bool) -> bool { let test = |c: char| {
if igncase { c.to_ascii_lowercase() == ch.to_ascii_lowercase() }
else { c == ch }
};
let mut i = 0;
while i < range.len() {
if i + 2 < range.len() && range[i + 1] == '-' {
let lo = range[i];
let hi = range[i + 2];
let c = if igncase { ch.to_ascii_lowercase() } else { ch };
let lo2 = if igncase { lo.to_ascii_lowercase() } else { lo };
let hi2 = if igncase { hi.to_ascii_lowercase() } else { hi };
if c >= lo2 && c <= hi2 { return true; }
i += 3;
} else if test(range[i]) {
return true;
} else {
i += 1;
}
}
false
}
pub fn patmatchindex(range: &[char], idx: usize) -> Option<char> { let mut n = 0;
let mut i = 0;
while i < range.len() {
if i + 2 < range.len() && range[i + 1] == '-' {
let lo = range[i] as u32;
let hi = range[i + 2] as u32;
for c in lo..=hi {
if n == idx { return char::from_u32(c); }
n += 1;
}
i += 3;
} else {
if n == idx { return Some(range[i]); }
n += 1;
i += 1;
}
}
None
}
pub fn mb_patmatchrange(range: &[char], ch: char, igncase: bool) -> bool { patmatchrange(range, ch, igncase)
}
pub fn mb_patmatchindex(range: &[char], idx: usize) -> Option<char> { patmatchindex(range, idx)
}
pub fn pattrystart() {}
pub fn patmungestring(s: &str) -> String { s.to_string()
}
pub fn patallocstr(s: &str) -> String { s.to_string()
}
pub static patterndisables: Mutex<Vec<String>> = Mutex::new(Vec::new());
pub fn startpatternscope() { let cur = patterndisables.lock().unwrap().clone();
PATSCOPE_STACK.with(|s| s.borrow_mut().push(cur));
}
thread_local! {
static PATSCOPE_STACK: std::cell::RefCell<Vec<Vec<String>>> =
const { std::cell::RefCell::new(Vec::new()) };
}
pub fn endpatternscope() { if let Some(prev) = PATSCOPE_STACK.with(|s| s.borrow_mut().pop()) {
*patterndisables.lock().unwrap() = prev;
}
}
pub fn savepatterndisables() -> Vec<String> { patterndisables.lock().unwrap().clone()
}
pub fn restorepatterndisables(disables: Vec<String>) { *patterndisables.lock().unwrap() = disables;
}
pub fn clearpatterndisables() { patterndisables.lock().unwrap().clear();
}
#[allow(unused_variables)]
pub fn freepatprog(prog: Patprog) {}
#[allow(unused_variables)]
pub fn pat_enables(cmd: &str, patp: &[&str], enable: bool) -> i32 { let mut disables = patterndisables.lock().unwrap();
for p in patp {
if enable {
disables.retain(|d| d != p);
} else if !disables.iter().any(|d| d == p) {
disables.push(p.to_string());
}
}
0
}
pub fn patmatch(pattern: &str, text: &str) -> bool {
match patcompile(pattern, PAT_HEAPDUP as i32, None) {
Some(prog) => pattry(&prog, text),
None => false,
}
}
pub fn patrepeat(prog: &Patprog, s: &str, max: Option<usize>) -> usize { let mut pos = 0;
let mut count = 0;
let max = max.unwrap_or(usize::MAX);
while pos < s.len() && count < max {
let mut state = rpat::new();
match patmatch_internal(&prog.1, 0, s, pos, &mut state, prog.0.flags) {
Some(new_pos) if new_pos > pos => {
pos = new_pos;
count += 1;
}
_ => break,
}
}
count
}
pub fn haswilds(str: &str) -> bool { str.chars().any(|c| matches!(c, '*' | '?' | '[' | '\\' | '(' | '|' | '<' | '#' | '^'))
}
#[deprecated(note = "use Patprog instead")]
pub type PatProg = Patprog;
pub fn extract_numeric_ranges(s: &str) -> Vec<(usize, usize, Option<i64>, Option<i64>)> {
let mut out = Vec::new();
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'<' {
let start = i;
let mut j = i + 1;
let lo_start = j;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
let lo: Option<i64> = if j > lo_start {
std::str::from_utf8(&bytes[lo_start..j]).ok()
.and_then(|s| s.parse::<i64>().ok())
} else { None };
if j < bytes.len() && bytes[j] == b'-' {
j += 1;
let hi_start = j;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
let hi: Option<i64> = if j > hi_start {
std::str::from_utf8(&bytes[hi_start..j]).ok()
.and_then(|s| s.parse::<i64>().ok())
} else { None };
if j < bytes.len() && bytes[j] == b'>' {
out.push((start, j + 1, lo, hi));
i = j + 1;
continue;
}
}
}
i += 1;
}
out
}
pub fn numeric_ranges_to_star(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut last = 0;
for (start, end, _, _) in extract_numeric_ranges(s) {
out.push_str(&s[last..start]);
out.push('*');
last = end;
}
out.push_str(&s[last..]);
out
}
pub fn numeric_range_contains(lo: Option<i64>, hi: Option<i64>, n: i64) -> bool {
lo.map_or(true, |l| n >= l) && hi.map_or(true, |h| n <= h)
}
#[cfg(test)]
mod tests {
use super::*;
static TEST_MUTEX: std::sync::Mutex<()> = std::sync::Mutex::new(());
fn compile(p: &str) -> Patprog {
let _g = TEST_MUTEX.lock().unwrap_or_else(|e| e.into_inner());
patcompile(p, PAT_HEAPDUP as i32, None).expect("compile failed")
}
#[test]
fn literal_match() {
let prog = compile("hello");
assert!(pattry(&prog, "hello"));
assert!(!pattry(&prog, "world"));
}
#[test]
fn star_matches_anything() {
let prog = compile("*");
assert!(pattry(&prog, ""));
assert!(pattry(&prog, "abc"));
}
#[test]
fn star_in_middle() {
let prog = compile("a*z");
assert!(pattry(&prog, "az"));
assert!(pattry(&prog, "abz"));
assert!(pattry(&prog, "aXYZz"));
assert!(!pattry(&prog, "ab"));
}
#[test]
fn question_matches_one() {
let prog = compile("a?c");
assert!(pattry(&prog, "abc"));
assert!(pattry(&prog, "axc"));
assert!(!pattry(&prog, "ac"));
}
#[test]
fn bracket_anyof() {
let prog = compile("[abc]");
assert!(pattry(&prog, "a"));
assert!(pattry(&prog, "b"));
assert!(pattry(&prog, "c"));
assert!(!pattry(&prog, "d"));
}
#[test]
fn bracket_range() {
let prog = compile("[a-z]");
assert!(pattry(&prog, "m"));
assert!(!pattry(&prog, "M"));
}
#[test]
fn bracket_negated() {
let prog = compile("[^0-9]");
assert!(pattry(&prog, "a"));
assert!(!pattry(&prog, "5"));
}
#[test]
fn alternation() {
let prog = compile("foo|bar");
assert!(pattry(&prog, "foo"));
assert!(pattry(&prog, "bar"));
assert!(!pattry(&prog, "baz"));
}
#[test]
fn captures() {
let prog = compile("(foo)(bar)");
let (ok, refs) = pattryrefs(&prog, "foobar").unwrap();
assert!(ok);
assert_eq!(refs.len(), 2);
assert_eq!(refs[0], (0, 3));
assert_eq!(refs[1], (3, 6));
}
#[test]
fn hash_zero_or_more() {
let prog = compile("a#");
assert!(pattry(&prog, ""));
assert!(pattry(&prog, "a"));
assert!(pattry(&prog, "aaa"));
}
#[test]
fn double_hash_one_or_more() {
let prog = compile("a##");
assert!(!pattry(&prog, ""));
assert!(pattry(&prog, "a"));
assert!(pattry(&prog, "aaa"));
}
#[test]
fn escape_literal() {
let prog = compile("a\\*b");
assert!(pattry(&prog, "a*b"));
assert!(!pattry(&prog, "azb"));
}
#[test]
fn convenience_patmatch() {
let _g = TEST_MUTEX.lock().unwrap_or_else(|e| e.into_inner());
assert!(patmatch("hello*", "hello world"));
assert!(!patmatch("x?z", "abc"));
}
#[test]
fn patcompile_concurrent_safe() {
use std::thread;
let handles: Vec<_> = (0..8).map(|i| {
thread::spawn(move || {
for _ in 0..200 {
assert!(patmatch(":completion:*", ":completion:zsh"));
assert!(patmatch("hello*", "hello world"));
let _ = i;
}
})
}).collect();
for h in handles { h.join().unwrap(); }
}
#[test]
fn haswilds_detects_meta() {
assert!(haswilds("*"));
assert!(haswilds("foo?"));
assert!(haswilds("[abc]"));
assert!(!haswilds("plain"));
}
#[test]
fn patmatchrange_basic() {
let r: Vec<char> = "a-zA-Z".chars().collect();
assert!(patmatchrange(&r, 'm', false));
assert!(patmatchrange(&r, 'X', false));
assert!(!patmatchrange(&r, '5', false));
}
#[test]
fn range_type_lookup() {
assert_eq!(range_type("alpha"), Some(1));
assert_eq!(range_type("digit"), Some(5));
assert_eq!(range_type("nonsense"), None);
}
#[test]
fn pattern_range_to_string_reverses() {
assert_eq!(pattern_range_to_string(1), Some("[:alpha:]".to_string()));
assert_eq!(pattern_range_to_string(0), None);
}
#[test]
fn patgetglobflags_case_insensitive() {
let (bits, _, n) = patgetglobflags("(#i)foo").unwrap();
assert!((bits & GF_IGNCASE) != 0);
assert_eq!(n, 4); }
#[test]
fn patgetglobflags_backref() {
let (bits, _, _) = patgetglobflags("(#b)").unwrap();
assert!((bits & GF_BACKREF) != 0);
}
#[test]
fn patgetglobflags_approx() {
let (bits, _, _) = patgetglobflags("(#a2)").unwrap();
assert_eq!(bits & 0xff, 2);
}
#[test]
fn pattry_no_anchor_default() {
let prog = compile("foo");
assert!(pattry(&prog, "foo"));
}
#[test]
fn numeric_range_inclusive() {
let prog = compile("<10-20>");
assert!(pattry(&prog, "15"));
assert!(pattry(&prog, "10"));
assert!(pattry(&prog, "20"));
assert!(!pattry(&prog, "9"));
assert!(!pattry(&prog, "21"));
}
#[test]
fn numeric_range_from_only() {
let prog = compile("<100->");
assert!(pattry(&prog, "100"));
assert!(pattry(&prog, "9999"));
assert!(!pattry(&prog, "99"));
}
#[test]
fn numeric_range_to_only() {
let prog = compile("<-5>");
assert!(pattry(&prog, "0"));
assert!(pattry(&prog, "5"));
assert!(!pattry(&prog, "6"));
}
#[test]
fn numeric_range_any() {
let prog = compile("<->");
assert!(pattry(&prog, "0"));
assert!(pattry(&prog, "12345"));
assert!(!pattry(&prog, "abc"));
}
#[test]
fn group_with_hash_quantifier() {
let prog = compile("(foo)#");
assert!(pattry(&prog, ""));
assert!(pattry(&prog, "foo"));
assert!(pattry(&prog, "foofoofoo"));
}
#[test]
fn group_alt_with_double_hash() {
let prog = compile("(a|b)##");
assert!(!pattry(&prog, ""));
assert!(pattry(&prog, "a"));
assert!(pattry(&prog, "abab"));
}
#[test]
fn literal_then_numeric_range() {
let prog = compile("v<1-99>");
assert!(pattry(&prog, "v1"));
assert!(pattry(&prog, "v50"));
assert!(pattry(&prog, "v99"));
assert!(!pattry(&prog, "v100"));
assert!(!pattry(&prog, "v0"));
}
#[test]
fn star_greedy_backtracks() {
let prog = compile("*.txt");
assert!(pattry(&prog, "foo.txt"));
assert!(pattry(&prog, "a.b.c.txt"));
assert!(!pattry(&prog, "foo.txx"));
}
#[test]
fn posix_alpha_class() {
let prog = compile("[[:alpha:]]##");
assert!(pattry(&prog, "abc"));
assert!(pattry(&prog, "XYZ"));
assert!(!pattry(&prog, "1"));
assert!(!pattry(&prog, ""));
}
#[test]
fn case_insensitive_via_glob_flag() {
let prog = compile("(#i)foo");
assert!(pattry(&prog, "foo"));
assert!(pattry(&prog, "FOO"));
assert!(pattry(&prog, "Foo"));
assert!(pattry(&prog, "fOo"));
}
#[test]
fn case_insensitive_bracket() {
let prog = compile("(#i)[abc]");
assert!(pattry(&prog, "A"));
assert!(pattry(&prog, "b"));
assert!(!pattry(&prog, "d"));
}
#[test]
fn case_insensitive_unicode() {
let prog = compile("(#i)Über");
assert!(pattry(&prog, "über"));
assert!(pattry(&prog, "ÜBER"));
let prog2 = compile("(#i)café");
assert!(pattry(&prog2, "CAFÉ"));
assert!(pattry(&prog2, "Café"));
}
#[test]
fn case_sensitive_default() {
let prog = compile("foo");
assert!(pattry(&prog, "foo"));
assert!(!pattry(&prog, "FOO"));
}
#[test]
fn mid_pattern_gflags_switch() {
let prog = compile("foo(#i)bar");
assert!(pattry(&prog, "fooBAR"));
assert!(pattry(&prog, "foobar"));
assert!(pattry(&prog, "fooBaR"));
assert!(!pattry(&prog, "FOOBAR"));
}
#[test]
fn start_anchor() {
let prog = compile("(#s)foo");
assert!(pattry(&prog, "foo"));
}
#[test]
fn end_anchor() {
let prog = compile("foo(#e)");
assert!(pattry(&prog, "foo"));
}
#[test]
fn count_range_3_to_5() {
let prog = compile("(#c3,5)x");
assert!(!pattry(&prog, "xx"));
assert!(pattry(&prog, "xxx"));
assert!(pattry(&prog, "xxxx"));
assert!(pattry(&prog, "xxxxx"));
assert!(!pattry(&prog, "xxxxxx"));
}
#[test]
fn count_exact_3() {
let prog = compile("(#c3)x");
assert!(!pattry(&prog, "xx"));
assert!(pattry(&prog, "xxx"));
assert!(!pattry(&prog, "xxxx"));
}
#[test]
fn debug_alt_b() {
let prog = compile("(a)|b");
eprintln!("bytecode len: {}", prog.1.len());
for (i, b) in prog.1.iter().enumerate() {
eprintln!(" [{:3}] {:#04x}", i, b);
}
let mut state = rpat::new();
let r = super::patmatch_internal(&prog.1, 0, "b", 0, &mut state, prog.0.flags);
eprintln!("match result: {:?}", r);
assert!(pattry(&prog, "b"));
}
#[test]
fn count_min_only() {
let prog = compile("(#c2,)x");
assert!(!pattry(&prog, "x"));
assert!(pattry(&prog, "xx"));
assert!(pattry(&prog, "xxxxxxxx"));
}
#[test]
fn captures_unmatched_group_returns_no_match() {
let prog = compile("(a)|b");
assert!(pattry(&prog, "a"));
assert!(pattry(&prog, "b"));
}
}