#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum PatOp {
End = 0x00, ExcSync = 0x01, ExcEnd = 0x02, Back = 0x03, Exactly = 0x04, Nothing = 0x05, OneHash = 0x06, TwoHash = 0x07, GFlags = 0x08, IsStart = 0x09, IsEnd = 0x0a, CountStart = 0x0b, Count = 0x0c, Branch = 0x20, WBranch = 0x21, Exclude = 0x30, ExcludP = 0x31, Any = 0x40, AnyOf = 0x41, AnyBut = 0x42, Star = 0x43, NumRng = 0x44, NumFrom = 0x45, NumTo = 0x46, NumAny = 0x47, Open = 0x80, Close = 0x90, }
const NSUBEXP: usize = 9;
#[derive(Debug, Clone, Copy, Default)]
pub struct PatFlags {
pub file: bool, pub any: bool, pub noanch: bool, pub nogld: bool, pub pures: bool, pub scan: bool, pub lcmatchuc: bool, }
#[derive(Debug, Clone, Copy, Default)]
pub struct GlobFlags {
pub igncase: bool, pub lcmatchuc: bool, pub matchref: bool, pub backref: bool, pub multibyte: bool, pub approx: u8, }
#[derive(Debug, Clone)]
pub struct PatProg {
code: Vec<PatNode>,
pub flags: PatFlags,
pub glob_start: GlobFlags,
pub glob_end: GlobFlags,
pub npar: usize,
pub start_char: Option<char>,
pub pure_string: Option<String>,
}
#[derive(Debug, Clone)]
pub enum PatNode {
End,
ExcSync,
ExcEnd,
Back(usize), Exactly(String), Nothing,
OneHash(Box<PatNode>), TwoHash(Box<PatNode>), GFlags(GlobFlags),
IsStart,
IsEnd,
CountStart,
Count {
min: u32,
max: Option<u32>,
node: Box<PatNode>,
},
Branch(Vec<PatNode>, usize), WBranch(Vec<PatNode>),
Exclude(Vec<PatNode>),
ExcludP(Vec<PatNode>),
Any, AnyOf(Vec<char>), AnyBut(Vec<char>), Star, NumRng(i64, i64), NumFrom(i64), NumTo(i64), NumAny, Open(usize), Close(usize), Sequence(Vec<PatNode>), }
struct PatCompiler<'a> {
input: &'a str,
pos: usize,
flags: PatFlags,
glob_flags: GlobFlags,
npar: usize,
extended_glob: bool,
ksh_glob: bool,
}
impl<'a> PatCompiler<'a> {
fn new(input: &'a str, flags: PatFlags) -> Self {
PatCompiler {
input,
pos: 0,
flags,
glob_flags: GlobFlags::default(),
npar: 0,
extended_glob: true,
ksh_glob: true,
}
}
fn with_options(mut self, extended: bool, ksh: bool) -> Self {
self.extended_glob = extended;
self.ksh_glob = ksh;
self
}
fn with_igncase(mut self, igncase: bool) -> Self {
self.glob_flags.igncase = igncase;
self
}
fn peek(&self) -> Option<char> {
self.input[self.pos..].chars().next()
}
fn peek_n(&self, n: usize) -> Option<char> {
self.input[self.pos..].chars().nth(n)
}
fn advance(&mut self) -> Option<char> {
let c = self.peek()?;
self.pos += c.len_utf8();
Some(c)
}
fn at_end(&self) -> bool {
self.pos >= self.input.len()
}
fn compile(mut self) -> Result<PatProg, String> {
if !self.has_pattern_chars() {
return Ok(PatProg {
code: vec![PatNode::Exactly(self.input.to_string()), PatNode::End],
flags: PatFlags {
pures: true,
..self.flags
},
glob_start: self.glob_flags,
glob_end: self.glob_flags,
npar: 0,
start_char: self.input.chars().next(),
pure_string: Some(self.input.to_string()),
});
}
let nodes = self.compile_branch()?;
let start_char = self.find_start_char(&nodes);
Ok(PatProg {
code: nodes,
flags: self.flags,
glob_start: self.glob_flags,
glob_end: self.glob_flags,
npar: self.npar,
start_char,
pure_string: None,
})
}
fn has_pattern_chars(&self) -> bool {
for c in self.input.chars() {
match c {
'*' | '?' | '[' | '\\' => return true,
'#' | '^' | '~' if self.extended_glob => return true,
'(' | ')' | '|' if self.ksh_glob => return true,
'<' | '>' if self.extended_glob => return true,
_ => {}
}
}
false
}
fn find_start_char(&self, nodes: &[PatNode]) -> Option<char> {
match nodes.first()? {
PatNode::Exactly(s) => s.chars().next(),
PatNode::Sequence(seq) => {
if let Some(PatNode::Exactly(s)) = seq.first() {
s.chars().next()
} else {
None
}
}
_ => None,
}
}
fn compile_branch(&mut self) -> Result<Vec<PatNode>, String> {
self.compile_branch_inner(true)
}
fn compile_branch_inner(&mut self, add_end: bool) -> Result<Vec<PatNode>, String> {
let mut nodes = Vec::new();
let mut alternatives: Vec<Vec<PatNode>> = Vec::new();
loop {
let node = self.compile_piece()?;
if let Some(n) = node {
nodes.push(n);
}
if self.at_end() {
break;
}
match self.peek() {
Some('|') => {
self.advance();
alternatives.push(std::mem::take(&mut nodes));
}
Some(')') => break,
None => break,
_ => {}
}
}
if !alternatives.is_empty() {
alternatives.push(nodes);
Ok(vec![PatNode::Branch(
alternatives.into_iter().flatten().collect(),
0,
)])
} else {
if add_end {
nodes.push(PatNode::End);
}
Ok(nodes)
}
}
fn compile_piece(&mut self) -> Result<Option<PatNode>, String> {
let Some(c) = self.peek() else {
return Ok(None);
};
let node = match c {
'*' => {
self.advance();
if self.ksh_glob && self.peek() == Some('(') {
self.advance();
let inner = self.compile_branch_inner(false)?;
if self.peek() != Some(')') {
return Err("missing ) in *(...)".to_string());
}
self.advance();
PatNode::OneHash(Box::new(PatNode::Sequence(inner)))
} else {
PatNode::Star
}
}
'?' => {
self.advance();
if self.ksh_glob && self.peek() == Some('(') {
self.advance();
let inner = self.compile_branch_inner(false)?;
if self.peek() != Some(')') {
return Err("missing ) in ?(...)".to_string());
}
self.advance();
PatNode::Branch(vec![PatNode::Sequence(inner), PatNode::Nothing], 0)
} else {
PatNode::Any
}
}
'[' => self.compile_bracket()?,
'\\' => {
self.advance();
if let Some(escaped) = self.advance() {
PatNode::Exactly(escaped.to_string())
} else {
PatNode::Exactly("\\".to_string())
}
}
'#' if self.extended_glob => {
self.advance();
if self.peek() == Some('#') {
self.advance();
return Ok(Some(PatNode::TwoHash(Box::new(PatNode::Any))));
}
PatNode::OneHash(Box::new(PatNode::Any))
}
'<' if self.extended_glob => self.compile_numeric_range()?,
'(' => {
self.advance();
self.npar += 1;
let group_num = self.npar;
let inner = self.compile_branch_inner(false)?;
if self.peek() != Some(')') {
return Err("missing )".to_string());
}
self.advance();
PatNode::Sequence(vec![
PatNode::Open(group_num),
PatNode::Sequence(inner),
PatNode::Close(group_num),
])
}
')' | '|' => return Ok(None),
'+' if self.ksh_glob && self.peek_n(1) == Some('(') => {
self.advance(); self.advance(); let inner = self.compile_branch_inner(false)?;
if self.peek() != Some(')') {
return Err("missing ) in +(...)".to_string());
}
self.advance();
PatNode::TwoHash(Box::new(PatNode::Sequence(inner)))
}
'!' if self.ksh_glob && self.peek_n(1) == Some('(') => {
self.advance(); self.advance(); let inner = self.compile_branch_inner(false)?;
if self.peek() != Some(')') {
return Err("missing ) in !(...)".to_string());
}
self.advance();
PatNode::Exclude(inner)
}
'@' if self.ksh_glob && self.peek_n(1) == Some('(') => {
self.advance(); self.advance(); let inner = self.compile_branch_inner(false)?;
if self.peek() != Some(')') {
return Err("missing ) in @(...)".to_string());
}
self.advance();
PatNode::Sequence(inner)
}
'^' if self.extended_glob => {
self.advance();
let inner = self.compile_piece()?;
if let Some(node) = inner {
PatNode::Exclude(vec![node])
} else {
return Err("^ requires pattern".to_string());
}
}
'~' if self.extended_glob => {
self.advance();
let inner = self.compile_piece()?;
if let Some(node) = inner {
PatNode::Exclude(vec![node])
} else {
return Err("~ requires pattern".to_string());
}
}
_ => {
let mut literal = String::new();
while let Some(ch) = self.peek() {
if self.is_special(ch) {
break;
}
literal.push(ch);
self.advance();
}
if literal.is_empty() {
return Ok(None);
}
PatNode::Exactly(literal)
}
};
if self.extended_glob {
match self.peek() {
Some('#') => {
self.advance();
if self.peek() == Some('#') {
self.advance();
return Ok(Some(PatNode::TwoHash(Box::new(node))));
}
return Ok(Some(PatNode::OneHash(Box::new(node))));
}
_ => {}
}
}
Ok(Some(node))
}
fn is_special(&self, c: char) -> bool {
matches!(c, '*' | '?' | '[' | '\\' | '(' | ')' | '|')
|| (self.extended_glob && matches!(c, '#' | '^' | '~' | '<'))
|| (self.ksh_glob && matches!(c, '+' | '!' | '@') && self.peek_n(1) == Some('('))
}
fn compile_bracket(&mut self) -> Result<PatNode, String> {
self.advance();
let negated = matches!(self.peek(), Some('!' | '^'));
if negated {
self.advance();
}
let mut chars = Vec::new();
if self.peek() == Some(']') {
chars.push(']');
self.advance();
}
while let Some(c) = self.peek() {
if c == ']' {
self.advance();
break;
}
if c == '\\' {
self.advance();
if let Some(escaped) = self.advance() {
chars.push(escaped);
}
continue;
}
if c == '[' && self.peek_n(1) == Some(':') {
if let Some(class_chars) = self.parse_posix_class() {
chars.extend(class_chars);
continue;
}
}
self.advance();
if self.peek() == Some('-') && self.peek_n(1) != Some(']') {
self.advance(); if let Some(end) = self.advance() {
for ch in c..=end {
chars.push(ch);
}
continue;
}
}
chars.push(c);
}
if negated {
Ok(PatNode::AnyBut(chars))
} else {
Ok(PatNode::AnyOf(chars))
}
}
fn parse_posix_class(&mut self) -> Option<Vec<char>> {
let start = self.pos;
self.advance(); self.advance();
let mut class_name = String::new();
while let Some(c) = self.peek() {
if c == ':' {
break;
}
class_name.push(c);
self.advance();
}
if self.peek() != Some(':') || self.peek_n(1) != Some(']') {
self.pos = start;
return None;
}
self.advance(); self.advance();
let chars: Vec<char> = match class_name.as_str() {
"alpha" => ('a'..='z').chain('A'..='Z').collect(),
"digit" => ('0'..='9').collect(),
"alnum" => ('a'..='z').chain('A'..='Z').chain('0'..='9').collect(),
"space" => vec![' ', '\t', '\n', '\r', '\x0b', '\x0c'],
"upper" => ('A'..='Z').collect(),
"lower" => ('a'..='z').collect(),
"punct" => "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~".chars().collect(),
"xdigit" => ('0'..='9').chain('a'..='f').chain('A'..='F').collect(),
"blank" => vec![' ', '\t'],
"cntrl" => (0u8..=31)
.map(|b| b as char)
.chain(std::iter::once(127 as char))
.collect(),
"graph" | "print" => (33u8..=126).map(|b| b as char).collect(),
"word" => ('a'..='z')
.chain('A'..='Z')
.chain('0'..='9')
.chain(std::iter::once('_'))
.collect(),
_ => return None,
};
Some(chars)
}
fn compile_numeric_range(&mut self) -> Result<PatNode, String> {
self.advance();
let mut from_str = String::new();
let mut to_str = String::new();
let mut in_to = false;
while let Some(c) = self.peek() {
if c == '>' {
self.advance();
break;
}
if c == '-' {
self.advance();
in_to = true;
continue;
}
if c.is_ascii_digit() {
if in_to {
to_str.push(c);
} else {
from_str.push(c);
}
self.advance();
} else {
return Err(format!("invalid character in numeric range: {}", c));
}
}
let from: Option<i64> = if from_str.is_empty() {
None
} else {
from_str.parse().ok()
};
let to: Option<i64> = if to_str.is_empty() {
None
} else {
to_str.parse().ok()
};
match (from, to) {
(Some(f), Some(t)) => Ok(PatNode::NumRng(f, t)),
(Some(f), None) => Ok(PatNode::NumFrom(f)),
(None, Some(t)) => Ok(PatNode::NumTo(t)),
(None, None) => Ok(PatNode::NumAny),
}
}
}
pub struct PatMatcher<'a> {
prog: &'a PatProg,
input: &'a str,
pos: usize,
glob_flags: GlobFlags,
captures: [(usize, usize); NSUBEXP],
captures_set: u16,
errors_found: u32,
}
impl<'a> PatMatcher<'a> {
pub fn new(prog: &'a PatProg, input: &'a str) -> Self {
PatMatcher {
prog,
input,
pos: 0,
glob_flags: prog.glob_start,
captures: [(0, 0); NSUBEXP],
captures_set: 0,
errors_found: 0,
}
}
pub fn try_match(&mut self) -> bool {
if let Some(ref pure) = self.prog.pure_string {
if self.glob_flags.igncase {
return self.input.eq_ignore_ascii_case(pure);
}
return self.input == pure;
}
if self.prog.flags.nogld && self.input.starts_with('.') {
return false;
}
self.match_nodes_at(&self.prog.code.clone(), 0)
}
fn match_nodes_at(&mut self, nodes: &[PatNode], start_idx: usize) -> bool {
let mut idx = start_idx;
while idx < nodes.len() {
let node = &nodes[idx];
if matches!(node, PatNode::Star) {
if idx + 1 >= nodes.len() {
self.pos = self.input.len();
return true;
}
let save_pos = self.pos;
let end_pos = if self.prog.flags.file {
self.input[self.pos..]
.find('/')
.map(|i| self.pos + i)
.unwrap_or(self.input.len())
} else {
self.input.len()
};
for try_pos in save_pos..=end_pos {
self.pos = try_pos;
if self.match_nodes_at(nodes, idx + 1) {
return true;
}
}
self.pos = save_pos;
return false;
}
if !self.match_node(node) {
return false;
}
idx += 1;
}
true
}
fn match_node(&mut self, node: &PatNode) -> bool {
match node {
PatNode::End => {
self.pos >= self.input.len() || self.prog.flags.noanch
}
PatNode::Exactly(s) => {
let remaining = &self.input[self.pos..];
if self.glob_flags.igncase {
if remaining.len() >= s.len() && remaining[..s.len()].eq_ignore_ascii_case(s) {
self.pos += s.len();
true
} else {
false
}
} else if remaining.starts_with(s) {
self.pos += s.len();
true
} else {
false
}
}
PatNode::Nothing => true,
PatNode::Any => {
if self.pos < self.input.len() {
let c = self.current_char();
if self.prog.flags.file && c == '/' {
return false;
}
self.pos += c.len_utf8();
true
} else {
false
}
}
PatNode::Star => {
if self.prog.flags.file {
if let Some(slash_pos) = self.input[self.pos..].find('/') {
self.pos += slash_pos;
} else {
self.pos = self.input.len();
}
} else {
self.pos = self.input.len();
}
true
}
PatNode::AnyOf(chars) => {
if self.pos >= self.input.len() {
return false;
}
let c = self.current_char();
let matched = if self.glob_flags.igncase {
chars.iter().any(|&ch| ch.eq_ignore_ascii_case(&c))
} else {
chars.contains(&c)
};
if matched {
self.pos += c.len_utf8();
true
} else {
false
}
}
PatNode::AnyBut(chars) => {
if self.pos >= self.input.len() {
return false;
}
let c = self.current_char();
let in_set = if self.glob_flags.igncase {
chars.iter().any(|&ch| ch.eq_ignore_ascii_case(&c))
} else {
chars.contains(&c)
};
if !in_set {
self.pos += c.len_utf8();
true
} else {
false
}
}
PatNode::Branch(alts, _) => {
let save_pos = self.pos;
for alt in alts {
self.pos = save_pos;
if self.match_node(alt) {
return true;
}
}
self.pos = save_pos;
false
}
PatNode::Sequence(nodes) => self.match_nodes_at(nodes, 0),
PatNode::OneHash(inner) => {
loop {
let save_pos = self.pos;
if !self.match_single_node(inner) {
self.pos = save_pos;
break;
}
if self.pos == save_pos {
break;
}
}
true
}
PatNode::TwoHash(inner) => {
if !self.match_single_node(inner) {
return false;
}
loop {
let save_pos = self.pos;
if !self.match_single_node(inner) {
self.pos = save_pos;
break;
}
if self.pos == save_pos {
break;
}
}
true
}
PatNode::Count { min, max, node } => {
let mut count = 0u32;
loop {
if let Some(m) = max {
if count >= *m {
break;
}
}
let save_pos = self.pos;
if !self.match_node(node) {
self.pos = save_pos;
break;
}
if self.pos == save_pos {
break;
}
count += 1;
}
count >= *min
}
PatNode::Open(n) => {
if *n > 0 && *n <= NSUBEXP {
self.captures[n - 1].0 = self.pos;
self.captures_set |= 1 << (n - 1);
}
true
}
PatNode::Close(n) => {
if *n > 0 && *n <= NSUBEXP {
self.captures[n - 1].1 = self.pos;
}
true
}
PatNode::NumRng(from, to) => self.match_number(Some(*from), Some(*to)),
PatNode::NumFrom(from) => self.match_number(Some(*from), None),
PatNode::NumTo(to) => self.match_number(None, Some(*to)),
PatNode::NumAny => self.match_number(None, None),
PatNode::IsStart => self.pos == 0,
PatNode::IsEnd => self.pos >= self.input.len(),
PatNode::GFlags(flags) => {
self.glob_flags = *flags;
true
}
PatNode::Exclude(inner) => {
let save_pos = self.pos;
let matched = self.match_nodes_at(inner, 0);
self.pos = save_pos;
!matched
}
PatNode::ExcludP(inner) => {
let save_pos = self.pos;
let matched = self.match_nodes_at(inner, 0);
self.pos = save_pos;
!matched
}
PatNode::WBranch(alts) => {
let save_pos = self.pos;
for alt in alts {
self.pos = save_pos;
if self.match_node(alt) && self.pos > save_pos {
return true;
}
}
self.pos = save_pos;
false
}
PatNode::ExcSync | PatNode::ExcEnd | PatNode::Back(_) | PatNode::CountStart => true,
}
}
fn current_char(&self) -> char {
self.input[self.pos..].chars().next().unwrap_or('\0')
}
fn match_single_node(&mut self, node: &PatNode) -> bool {
match node {
PatNode::Sequence(nodes) => self.match_nodes_at(nodes, 0),
_ => self.match_node(node),
}
}
fn match_number(&mut self, from: Option<i64>, to: Option<i64>) -> bool {
let start = self.pos;
let mut num_str = String::new();
while self.pos < self.input.len() {
let c = self.current_char();
if c.is_ascii_digit() {
num_str.push(c);
self.pos += 1;
} else {
break;
}
}
if num_str.is_empty() {
self.pos = start;
return false;
}
let num: i64 = match num_str.parse() {
Ok(n) => n,
Err(_) => {
self.pos = start;
return false;
}
};
let in_range = match (from, to) {
(Some(f), Some(t)) => num >= f && num <= t,
(Some(f), None) => num >= f,
(None, Some(t)) => num <= t,
(None, None) => true,
};
if !in_range {
self.pos = start;
return false;
}
true
}
pub fn captures(&self) -> &[(usize, usize); NSUBEXP] {
&self.captures
}
pub fn capture(&self, n: usize) -> Option<&'a str> {
if n == 0 || n > NSUBEXP {
return None;
}
if self.captures_set & (1 << (n - 1)) == 0 {
return None;
}
let (start, end) = self.captures[n - 1];
if start <= end && end <= self.input.len() {
Some(&self.input[start..end])
} else {
None
}
}
}
pub fn patcompile(pattern: &str, flags: PatFlags) -> Result<PatProg, String> {
PatCompiler::new(pattern, flags).compile()
}
pub fn patcompile_opts(
pattern: &str,
flags: PatFlags,
extended_glob: bool,
ksh_glob: bool,
igncase: bool,
) -> Result<PatProg, String> {
PatCompiler::new(pattern, flags)
.with_options(extended_glob, ksh_glob)
.with_igncase(igncase)
.compile()
}
pub fn pattry(prog: &PatProg, s: &str) -> bool {
PatMatcher::new(prog, s).try_match()
}
pub fn patmatch(pattern: &str, text: &str) -> bool {
match patcompile(pattern, PatFlags::default()) {
Ok(prog) => pattry(&prog, text),
Err(_) => false,
}
}
pub fn patmatch_opts(
pattern: &str,
text: &str,
extended_glob: bool,
ksh_glob: bool,
igncase: bool,
) -> bool {
match patcompile_opts(
pattern,
PatFlags::default(),
extended_glob,
ksh_glob,
igncase,
) {
Ok(prog) => pattry(&prog, text),
Err(_) => false,
}
}
pub fn patmatch_captures<'a>(prog: &'a PatProg, text: &'a str) -> Option<Vec<Option<&'a str>>> {
let mut matcher = PatMatcher::new(prog, text);
if matcher.try_match() {
let mut captures = Vec::with_capacity(prog.npar);
for i in 1..=prog.npar {
captures.push(matcher.capture(i));
}
Some(captures)
} else {
None
}
}
pub fn pattrylen(prog: &PatProg, s: &str, len: usize) -> bool {
let truncated = if len < s.len() { &s[..len] } else { s };
pattry(prog, truncated)
}
pub fn pattryrefs(prog: &PatProg, s: &str) -> Option<(bool, Vec<(usize, usize)>)> {
let mut matcher = PatMatcher::new(prog, s);
let matched = matcher.try_match();
if matched {
let refs: Vec<(usize, usize)> = (1..=prog.npar).map(|i| matcher.captures[i - 1]).collect();
Some((true, refs))
} else {
Some((false, Vec::new()))
}
}
pub fn patmatchlen(prog: &PatProg, s: &str) -> Option<usize> {
let mut matcher = PatMatcher::new(prog, s);
if matcher.try_match() {
Some(matcher.pos)
} else {
None
}
}
pub fn patgetglobflags(s: &str) -> Option<(GlobFlags, Option<PatOp>, usize)> {
if !s.starts_with("(#") {
return None;
}
let mut flags = GlobFlags::default();
let mut assert_op = None;
let mut pos = 2; let bytes = s.as_bytes();
while pos < bytes.len() && bytes[pos] != b')' {
match bytes[pos] {
b'q' => {
while pos < bytes.len() && bytes[pos] != b')' {
pos += 1;
}
break;
}
b'a' => {
pos += 1;
let mut num_str = String::new();
while pos < bytes.len() && bytes[pos].is_ascii_digit() {
num_str.push(bytes[pos] as char);
pos += 1;
}
flags.approx = num_str.parse().unwrap_or(1).min(254);
continue; }
b'l' => {
flags.lcmatchuc = true;
flags.igncase = false;
}
b'i' => {
flags.igncase = true;
flags.lcmatchuc = false;
}
b'I' => {
flags.igncase = false;
flags.lcmatchuc = false;
}
b'b' => {
flags.backref = true;
}
b'B' => {
flags.backref = false;
}
b'm' => {
flags.matchref = true;
}
b'M' => {
flags.matchref = false;
}
b's' => {
assert_op = Some(PatOp::IsStart);
}
b'e' => {
assert_op = Some(PatOp::IsEnd);
}
b'u' => {
flags.multibyte = true;
}
b'U' => {
flags.multibyte = false;
}
_ => return None,
}
pos += 1;
}
if pos >= bytes.len() || bytes[pos] != b')' {
return None;
}
pos += 1;
if assert_op.is_some() && pos - 3 > 1 {
return None;
}
Some((flags, assert_op, pos))
}
pub fn patmatchrange(range: &[char], ch: char, igncase: bool) -> bool {
let ch = if igncase { ch.to_ascii_lowercase() } else { ch };
for &rc in range {
let rc = if igncase { rc.to_ascii_lowercase() } else { rc };
if rc == ch {
return true;
}
}
false
}
pub fn patmatchindex(range: &[char], idx: usize) -> Option<char> {
range.get(idx).copied()
}
pub fn haswilds(s: &str) -> bool {
for c in s.chars() {
match c {
'*' | '?' | '[' | '#' | '^' | '~' | '<' | '>' => return true,
_ => {}
}
}
false
}
pub fn patrepeat(prog: &PatProg, s: &str, max: Option<usize>) -> usize {
let mut matcher = PatMatcher::new(prog, s);
let mut count = 0;
loop {
if let Some(m) = max {
if count >= m {
break;
}
}
let save = matcher.pos;
if !matcher.match_nodes_at(&prog.code, 0) {
matcher.pos = save;
break;
}
if matcher.pos == save {
break; }
count += 1;
}
count
}
#[derive(Debug, Default, Clone)]
pub struct PatternScope {
pub disabled: Vec<String>,
}
use std::sync::Mutex;
static PATTERN_SCOPES: Mutex<Vec<PatternScope>> = Mutex::new(Vec::new());
pub fn startpatternscope() {
PATTERN_SCOPES.lock().unwrap().push(PatternScope::default());
}
pub fn endpatternscope() {
PATTERN_SCOPES.lock().unwrap().pop();
}
pub fn savepatterndisables() -> Vec<String> {
PATTERN_SCOPES
.lock()
.unwrap()
.last()
.map(|s| s.disabled.clone())
.unwrap_or_default()
}
pub fn restorepatterndisables(disables: Vec<String>) {
if let Some(scope) = PATTERN_SCOPES.lock().unwrap().last_mut() {
scope.disabled = disables;
}
}
pub fn clearpatterndisables() {
if let Some(scope) = PATTERN_SCOPES.lock().unwrap().last_mut() {
scope.disabled.clear();
}
}
pub fn freepatprog(_prog: PatProg) {
}
pub fn pat_enables(cmd: &str, patterns: &[&str], enable: bool) -> i32 {
let _ = (cmd, patterns, enable);
0
}
pub const COLON_CLASSES: &[&str] = &[
"alpha",
"alnum",
"ascii",
"blank",
"cntrl",
"digit",
"graph",
"lower",
"print",
"punct",
"space",
"upper",
"xdigit",
"IDENT",
"IFS",
"IFSSPACE",
"WORD",
"INCOMPLETE",
"INVALID",
];
pub fn range_type(name: &str) -> Option<usize> {
COLON_CLASSES.iter().position(|&c| c == name)
}
pub fn pattern_range_to_string(range_type_idx: usize) -> Option<String> {
COLON_CLASSES
.get(range_type_idx)
.map(|s| format!("[:{}:]", s))
}
pub fn clear_shiftstate() {}
pub fn metacharinc(s: &str, pos: usize) -> usize {
let c = s[pos..].chars().next().map(|c| c.len_utf8()).unwrap_or(1);
pos + c
}
pub fn patadd(prog: &mut Vec<PatNode>, node: PatNode) {
prog.push(node);
}
pub fn patcompcharsset() {}
pub fn patcompstart() {}
pub fn patcompswitch(pattern: &str, flags: PatFlags) -> Result<PatProg, String> {
patcompile(pattern, flags)
}
pub fn patcompbranch(pattern: &str, flags: PatFlags) -> Result<PatProg, String> {
patcompile(pattern, flags)
}
pub fn patcomppiece(pattern: &str, flags: PatFlags) -> Result<PatProg, String> {
patcompile(pattern, flags)
}
pub fn patcompnot(pattern: &str, flags: PatFlags) -> Result<PatProg, String> {
let negated = format!("^({})", pattern);
patcompile(&negated, flags)
}
pub fn patnode(prog: &mut Vec<PatNode>, node: PatNode) -> usize {
let idx = prog.len();
prog.push(node);
idx
}
pub fn patinsert(prog: &mut Vec<PatNode>, pos: usize, node: PatNode) {
if pos <= prog.len() {
prog.insert(pos, node);
}
}
pub fn pattail(_prog: &[PatNode], _p: usize, _val: usize) {}
pub fn patoptail(_prog: &[PatNode], _p: usize, _val: usize) {}
pub fn charref(s: &str, pos: usize) -> Option<char> {
s[pos..].chars().next()
}
pub fn charnext(s: &str, pos: usize) -> usize {
metacharinc(s, pos)
}
pub fn charrefinc(s: &str, pos: &mut usize) -> Option<char> {
let c = s[*pos..].chars().next()?;
*pos += c.len_utf8();
Some(c)
}
pub fn charsub(s: &str, pos: usize) -> usize {
if pos == 0 {
return 0;
}
let prev = s[..pos]
.chars()
.next_back()
.map(|c| c.len_utf8())
.unwrap_or(1);
pos - prev
}
pub fn pattrystart() {}
pub fn patmungestring(s: &str) -> String {
s.to_string()
}
pub fn mb_patmatchrange(range: &[char], ch: char, igncase: bool) -> bool {
patmatchrange(range, ch, igncase)
}
pub fn mb_patmatchindex(range: &[char], idx: usize) -> Option<char> {
patmatchindex(range, idx)
}
pub fn patallocstr(s: &str) -> String {
s.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_literal() {
assert!(patmatch("hello", "hello"));
assert!(!patmatch("hello", "world"));
assert!(!patmatch("hello", "hell"));
}
#[test]
fn test_star() {
assert!(patmatch("*", "anything"));
assert!(patmatch("*", ""));
assert!(patmatch("h*o", "hello"));
assert!(patmatch("h*o", "ho"));
assert!(!patmatch("h*o", "hi"));
}
#[test]
fn test_question() {
assert!(patmatch("?", "a"));
assert!(!patmatch("?", "ab"));
assert!(patmatch("h?llo", "hello"));
assert!(patmatch("h?llo", "hallo"));
assert!(!patmatch("h?llo", "hllo"));
}
#[test]
fn test_bracket() {
assert!(patmatch("[abc]", "a"));
assert!(patmatch("[abc]", "b"));
assert!(!patmatch("[abc]", "d"));
assert!(patmatch("[a-z]", "m"));
assert!(!patmatch("[a-z]", "5"));
}
#[test]
fn test_bracket_negated() {
assert!(!patmatch("[!abc]", "a"));
assert!(patmatch("[!abc]", "d"));
assert!(patmatch("[^abc]", "x"));
}
#[test]
fn test_escape() {
assert!(patmatch("\\*", "*"));
assert!(!patmatch("\\*", "a"));
assert!(patmatch("\\?", "?"));
}
#[test]
fn test_numeric_range() {
assert!(patmatch("<1-10>", "5"));
assert!(patmatch("<1-10>", "1"));
assert!(patmatch("<1-10>", "10"));
assert!(!patmatch("<1-10>", "0"));
assert!(!patmatch("<1-10>", "11"));
}
#[test]
fn test_case_insensitive() {
assert!(patmatch_opts("Hello", "HELLO", true, true, true));
assert!(patmatch_opts("Hello", "hello", true, true, true));
assert!(!patmatch_opts("Hello", "HELLO", true, true, false));
}
#[test]
fn test_extended_hash() {
assert!(patmatch("a#", ""));
assert!(patmatch("a#", "a"));
assert!(patmatch("a#", "aaa"));
}
#[test]
fn test_captures() {
let prog = patcompile("(foo)(bar)", PatFlags::default()).unwrap();
let captures = patmatch_captures(&prog, "foobar").unwrap();
assert_eq!(captures.len(), 2);
assert_eq!(captures[0], Some("foo"));
assert_eq!(captures[1], Some("bar"));
}
#[test]
fn test_posix_class() {
assert!(patmatch("[[:alpha:]]", "a"));
assert!(patmatch("[[:alpha:]]", "Z"));
assert!(!patmatch("[[:alpha:]]", "5"));
assert!(patmatch("[[:digit:]]", "5"));
assert!(!patmatch("[[:digit:]]", "a"));
}
#[test]
fn test_pure_string_optimization() {
let prog = patcompile("hello", PatFlags::default()).unwrap();
assert!(prog.flags.pures);
assert!(prog.pure_string.is_some());
}
#[test]
fn test_ksh_glob_plus() {
assert!(patmatch("+(ab)", "ab"));
assert!(patmatch("+(ab)", "abab"));
assert!(!patmatch("+(ab)", ""));
}
#[test]
fn test_ksh_glob_star() {
assert!(patmatch("*(ab)", ""));
assert!(patmatch("*(ab)", "ab"));
assert!(patmatch("*(ab)", "ababab"));
}
#[test]
fn test_ksh_glob_question() {
assert!(patmatch("?(ab)c", "c"));
assert!(patmatch("?(ab)c", "abc"));
}
#[test]
fn test_pattrylen() {
let prog = patcompile("hello", PatFlags::default()).unwrap();
assert!(pattrylen(&prog, "hello world", 5));
assert!(!pattrylen(&prog, "hello world", 3));
}
#[test]
fn test_patmatchlen() {
let prog = patcompile(
"hel*",
PatFlags {
noanch: true,
..Default::default()
},
)
.unwrap();
let len = patmatchlen(&prog, "hello world");
assert!(len.is_some());
}
#[test]
fn test_patgetglobflags() {
let (flags, assert_op, consumed) = patgetglobflags("(#i)rest").unwrap();
assert!(flags.igncase);
assert!(assert_op.is_none());
assert_eq!(consumed, 4);
let (flags, _, _) = patgetglobflags("(#l)rest").unwrap();
assert!(flags.lcmatchuc);
assert!(!flags.igncase);
let (_, assert_op, _) = patgetglobflags("(#s)rest").unwrap();
assert_eq!(assert_op, Some(PatOp::IsStart));
let (flags, _, _) = patgetglobflags("(#bm)rest").unwrap();
assert!(flags.backref);
assert!(flags.matchref);
}
#[test]
fn test_haswilds() {
assert!(haswilds("*.txt"));
assert!(haswilds("file?"));
assert!(haswilds("[abc]"));
assert!(haswilds("foo#"));
assert!(!haswilds("plain"));
}
#[test]
fn test_patmatchrange() {
let range = vec!['a', 'b', 'c'];
assert!(patmatchrange(&range, 'a', false));
assert!(!patmatchrange(&range, 'd', false));
assert!(patmatchrange(&range, 'A', true));
}
#[test]
fn test_range_type() {
assert_eq!(range_type("alpha"), Some(0));
assert_eq!(range_type("digit"), Some(5));
assert_eq!(range_type("nonexistent"), None);
}
#[test]
fn test_pattern_range_to_string() {
assert_eq!(pattern_range_to_string(0), Some("[:alpha:]".to_string()));
assert_eq!(pattern_range_to_string(5), Some("[:digit:]".to_string()));
}
}