mod compile;
mod parser;
mod vm;
#[cfg(test)]
mod tests;
use alloc::string::String;
use alloc::vec::Vec;
pub use parser::RegexError;
pub struct Regex {
prog: Vec<vm::Inst>,
group_count: usize,
group_names: alloc::vec::Vec<(usize, alloc::string::String)>,
flags: Flags,
}
#[derive(Clone, Copy, Default)]
pub struct Flags {
pub ignore_case: bool,
pub global: bool,
pub multiline: bool,
pub dotall: bool,
pub sticky: bool,
}
impl Flags {
pub fn parse(s: &str) -> Result<Flags, RegexError> {
let mut f = Flags::default();
for c in s.chars() {
match c {
'i' => f.ignore_case = true,
'g' => f.global = true,
'm' => f.multiline = true,
's' => f.dotall = true,
'y' => f.sticky = true,
'u' | 'd' => {} other => return Err(RegexError::new(alloc::format!("unknown flag `{other}`"))),
}
}
Ok(f)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Captures {
pub groups: Vec<Option<(usize, usize)>>,
}
impl Captures {
#[must_use]
pub fn whole(&self) -> (usize, usize) {
self.groups[0].expect("group 0 is always set on a successful match")
}
#[must_use]
pub fn group(&self, i: usize) -> Option<(usize, usize)> {
self.groups.get(i).copied().flatten()
}
}
impl Regex {
pub fn new(pattern: &str, flags: &str) -> Result<Regex, RegexError> {
let flags = Flags::parse(flags)?;
let (ast, _, group_names) = parser::parse(pattern)?;
let (prog, group_count) = compile::compile(&ast, &group_names)?;
Ok(Regex {
prog,
group_count,
group_names,
flags,
})
}
#[must_use]
pub fn group_count(&self) -> usize {
self.group_count
}
#[must_use]
pub fn group_names(&self) -> &[(usize, alloc::string::String)] {
&self.group_names
}
#[must_use]
pub fn flags(&self) -> Flags {
self.flags
}
#[must_use]
pub fn is_match(&self, text: &str) -> bool {
self.captures_at(&text.chars().collect::<Vec<_>>(), 0)
.is_some()
}
#[must_use]
pub fn captures_from(&self, text: &str, start: usize) -> Option<Captures> {
let chars: Vec<char> = text.chars().collect();
self.captures_at(&chars, start)
}
#[must_use]
pub fn find_from(&self, text: &str, start: usize) -> Option<(usize, usize)> {
self.captures_from(text, start).map(|c| c.whole())
}
fn captures_at(&self, chars: &[char], start: usize) -> Option<Captures> {
let last = if self.flags.sticky {
start
} else {
chars.len()
};
for s in start..=last {
if let Some(groups) = vm::run(&self.prog, chars, s, self.group_count, self.flags) {
return Some(Captures { groups });
}
}
None
}
#[must_use]
pub fn replace(&self, text: &str, replacement: &str) -> String {
let chars: Vec<char> = text.chars().collect();
let mut out = String::new();
let mut pos = 0;
while pos <= chars.len() {
let Some(caps) = self.captures_at(&chars, pos) else {
break;
};
let (ms, me) = caps.whole();
out.extend(&chars[pos..ms]);
expand_replacement(replacement, &chars, &caps, &mut out);
if me > ms {
pos = me;
} else {
if me < chars.len() {
out.push(chars[me]);
}
pos = me + 1;
}
if !self.flags.global {
break;
}
}
out.extend(&chars[pos.min(chars.len())..]);
out
}
}
fn expand_replacement(template: &str, chars: &[char], caps: &Captures, out: &mut String) {
let t: Vec<char> = template.chars().collect();
let mut i = 0;
while i < t.len() {
if t[i] == '$' && i + 1 < t.len() {
match t[i + 1] {
'&' => {
let (s, e) = caps.whole();
out.extend(&chars[s..e]);
i += 2;
continue;
}
d @ '1'..='9' => {
let idx = d as usize - '0' as usize;
if let Some((s, e)) = caps.group(idx) {
out.extend(&chars[s..e]);
}
i += 2;
continue;
}
'$' => {
out.push('$');
i += 2;
continue;
}
_ => {}
}
}
out.push(t[i]);
i += 1;
}
}