use std::collections::HashMap;
use crate::ast::Node;
use crate::error::Result;
use crate::flags::Flags;
use crate::match_obj::{FindIter, GroupMatch, Match, MatchStatus, PartialMatch};
use crate::matcher;
use crate::state::State;
pub struct Regex {
pattern: String,
ast: Box<Node>,
flags: Flags,
n_groups: usize,
names: HashMap<String, usize>,
}
impl Regex {
pub fn new(pattern: &str) -> Result<Regex> {
Self::new_with_flags(pattern, Flags::NONE)
}
pub fn new_with_flags(pattern: &str, flags: Flags) -> Result<Regex> {
let parsed = crate::parser::parse(pattern, flags)?;
Ok(Regex {
pattern: pattern.to_string(),
ast: Box::new(parsed.node),
flags: parsed.flags,
n_groups: parsed.n_groups,
names: parsed.names,
})
}
pub fn as_str(&self) -> &str {
&self.pattern
}
pub fn flags(&self) -> Flags {
self.flags
}
pub fn capture_count(&self) -> usize {
self.n_groups
}
pub fn group_names(&self) -> &HashMap<String, usize> {
&self.names
}
pub fn group_index(&self, name: &str) -> Option<usize> {
self.names.get(name).copied()
}
pub(crate) fn names_clone(&self) -> HashMap<String, usize> {
self.names.clone()
}
pub fn dump(&self) -> String {
let mut s = String::new();
let _ = self.ast.dump(&mut s, 0);
s
}
fn build_state(&self, haystack: &str) -> State {
let mut chars = Vec::with_capacity(haystack.len());
let mut c2b = Vec::with_capacity(haystack.len() + 1);
for (b, c) in haystack.char_indices() {
chars.push(c);
c2b.push(b);
}
c2b.push(haystack.len());
State::new(chars, c2b, self.n_groups)
}
fn match_from_state<'h>(&self, haystack: &'h str, st: &State) -> Match<'h> {
Match {
haystack,
char_to_byte: st.char_to_byte.clone(),
caps: st.caps.clone(),
log: st.log.clone(),
names: self.names_clone(),
}
}
pub(crate) fn find_from(&self, st: &mut State, from: usize) -> Option<(usize, usize)> {
let n = st.len();
let mut start = from;
while start <= n {
st.reset_for_search(start);
if matcher::try_match(&self.ast, st) {
let end = st.pos;
st.caps[0] = Some((start, end));
st.log[0].push((start, end));
return Some((start, end));
}
start += 1;
}
None
}
pub fn is_match(&self, haystack: &str) -> bool {
let mut st = self.build_state(haystack);
self.find_from(&mut st, 0).is_some()
}
pub fn find<'h>(&self, haystack: &'h str) -> Option<Match<'h>> {
let mut st = self.build_state(haystack);
self.find_from(&mut st, 0)?;
Some(self.match_from_state(haystack, &st))
}
pub fn find_at<'h>(&self, haystack: &'h str, start: usize) -> Option<Match<'h>> {
let mut st = self.build_state(haystack);
let clamp = start.min(haystack.len());
let start_char = haystack[..clamp].chars().count();
self.find_from(&mut st, start_char)?;
Some(self.match_from_state(haystack, &st))
}
pub fn captures<'h>(&self, haystack: &'h str) -> Option<Match<'h>> {
self.find(haystack)
}
pub fn match_at_start<'h>(&self, haystack: &'h str) -> Option<Match<'h>> {
let mut st = self.build_state(haystack);
st.reset_for_search(0);
if matcher::try_match(&self.ast, &mut st) {
let end = st.pos;
st.caps[0] = Some((0, end));
st.log[0].push((0, end));
Some(self.match_from_state(haystack, &st))
} else {
None
}
}
pub fn fullmatch<'h>(&self, haystack: &'h str) -> Option<Match<'h>> {
let mut st = self.build_state(haystack);
let n = st.len();
st.reset_for_search(0);
if matcher::try_match_to(&self.ast, &mut st, n) {
let end = st.pos;
st.caps[0] = Some((0, end));
st.log[0].push((0, end));
Some(self.match_from_state(haystack, &st))
} else {
None
}
}
pub fn find_iter<'r, 'h>(&'r self, haystack: &'h str) -> FindIter<'r, 'h> {
FindIter {
re: self,
haystack,
st: self.build_state(haystack),
pos: 0,
last_end: None,
}
}
pub fn captures_iter<'r, 'h>(&'r self, haystack: &'h str) -> FindIter<'r, 'h> {
self.find_iter(haystack)
}
pub fn find_partial<'h>(&self, haystack: &'h str) -> Option<PartialMatch<'h>> {
let mut st = self.build_state(haystack);
let n = st.len();
let char_to_byte = st.char_to_byte.clone();
for start in 0..=n {
st.reset_for_search(start);
st.partial_mode = true;
if matcher::try_match_to(&self.ast, &mut st, n) {
st.caps[0] = Some((start, n));
let full = crate::state::PartialCandidate {
end: n,
completed: st.caps.iter().filter(|c| c.is_some()).count(),
caps: st.caps.clone(),
open: Vec::new(),
};
return Some(self.build_partial_match(
haystack,
&char_to_byte,
MatchStatus::Full,
start,
n,
&full,
));
}
if let Some(cand) = st.partial_best.take() {
if cand.end == n && n > start {
return Some(self.build_partial_match(
haystack,
&char_to_byte,
MatchStatus::Partial,
start,
cand.end,
&cand,
));
}
}
}
None
}
fn build_partial_match<'h>(
&self,
haystack: &'h str,
char_to_byte: &[usize],
status: MatchStatus,
start_char: usize,
end_char: usize,
cand: &crate::state::PartialCandidate,
) -> PartialMatch<'h> {
let bs = char_to_byte[start_char];
let be = char_to_byte[end_char];
let matched = &haystack[bs..be];
let groups = (0..=self.n_groups)
.map(|g| {
if g == 0 {
return GroupMatch::Matched(matched);
}
if let Some(&(_, gstart)) = cand.open.iter().rev().find(|(idx, _)| *idx == g) {
let gs = char_to_byte[gstart];
return GroupMatch::Partial(&haystack[gs..be]);
}
if let Some(Some((s, e))) = cand.caps.get(g).copied() {
return GroupMatch::Matched(&haystack[char_to_byte[s]..char_to_byte[e]]);
}
GroupMatch::None
})
.collect();
PartialMatch {
status,
matched,
start: bs,
end: be,
groups,
names: self.names_clone(),
}
}
pub fn replace(&self, haystack: &str, repl: &str) -> String {
let mut st = self.build_state(haystack);
match self.find_from(&mut st, 0) {
Some((s, e)) => {
let m = self.match_from_state(haystack, &st);
let bs = st.char_to_byte[s];
let be = st.char_to_byte[e];
let mut out = String::with_capacity(haystack.len());
out.push_str(&haystack[..bs]);
out.push_str(&expand(repl, &m));
out.push_str(&haystack[be..]);
out
}
None => haystack.to_string(),
}
}
pub fn replace_all(&self, haystack: &str, repl: &str) -> String {
let mut out = String::with_capacity(haystack.len());
let mut st = self.build_state(haystack);
let mut cursor = 0usize; loop {
match self.find_from(&mut st, cursor) {
Some((s, e)) => {
let bs = st.char_to_byte[s];
let m = self.match_from_state(haystack, &st);
out.push_str(&haystack[st.char_to_byte[cursor]..bs]);
out.push_str(&expand(repl, &m));
if e == s {
if e < st.len() {
let skip_bs = st.char_to_byte[e];
let skip_be = st.char_to_byte[e + 1];
out.push_str(&haystack[skip_bs..skip_be]);
}
cursor = e + 1;
} else {
cursor = e;
}
if cursor > st.len() {
break;
}
}
None => {
out.push_str(&haystack[st.char_to_byte[cursor]..]);
break;
}
}
}
out
}
pub fn split(&self, haystack: &str) -> Vec<String> {
self.split_iter(haystack).collect()
}
pub fn split_iter<'r, 'h>(&'r self, haystack: &'h str) -> SplitIter<'r, 'h> {
SplitIter {
re: self,
haystack,
st: self.build_state(haystack),
cursor: 0,
pending: Vec::new(),
done: false,
}
}
}
impl std::fmt::Debug for Regex {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Regex")
.field("pattern", &self.pattern)
.field("flags", &self.flags.bits())
.field("groups", &self.n_groups)
.finish()
}
}
fn expand(repl: &str, m: &Match<'_>) -> String {
let chars: Vec<char> = repl.chars().collect();
let mut out = String::new();
let mut i = 0;
while i < chars.len() {
let c = chars[i];
match c {
'$' => {
i += 1;
if i >= chars.len() {
out.push('$');
break;
}
match chars[i] {
'$' => {
out.push('$');
i += 1;
}
'&' => {
out.push_str(m.as_str());
i += 1;
}
'{' => {
i += 1;
let mut name = String::new();
while i < chars.len() && chars[i] != '}' {
name.push(chars[i]);
i += 1;
}
if i < chars.len() {
i += 1; }
append_group(&mut out, m, &name);
}
d if d.is_ascii_digit() => {
let mut num = String::new();
while i < chars.len() && chars[i].is_ascii_digit() {
num.push(chars[i]);
i += 1;
}
append_group(&mut out, m, &num);
}
other => {
out.push('$');
out.push(other);
i += 1;
}
}
}
'\\' => {
i += 1;
if i >= chars.len() {
out.push('\\');
break;
}
match chars[i] {
'\\' => {
out.push('\\');
i += 1;
}
'n' => {
out.push('\n');
i += 1;
}
't' => {
out.push('\t');
i += 1;
}
'r' => {
out.push('\r');
i += 1;
}
'g' => {
i += 1;
if i < chars.len() && (chars[i] == '<' || chars[i] == '\'') {
let close = if chars[i] == '<' { '>' } else { '\'' };
i += 1;
let mut name = String::new();
while i < chars.len() && chars[i] != close {
name.push(chars[i]);
i += 1;
}
if i < chars.len() {
i += 1;
}
append_group(&mut out, m, &name);
}
}
d if d.is_ascii_digit() => {
let mut num = String::new();
while i < chars.len() && chars[i].is_ascii_digit() {
num.push(chars[i]);
i += 1;
}
append_group(&mut out, m, &num);
}
other => {
out.push('\\');
out.push(other);
i += 1;
}
}
}
_ => {
out.push(c);
i += 1;
}
}
}
out
}
fn append_group(out: &mut String, m: &Match<'_>, name: &str) {
if name.chars().all(|c| c.is_ascii_digit()) {
if let Ok(n) = name.parse::<usize>() {
if let Some(s) = m.group(n) {
out.push_str(s);
}
return;
}
}
if let Some(s) = m.name(name) {
out.push_str(s);
}
}
pub struct SplitIter<'r, 'h> {
re: &'r Regex,
haystack: &'h str,
st: State,
cursor: usize, pending: Vec<String>,
done: bool,
}
impl<'r, 'h> Iterator for SplitIter<'r, 'h> {
type Item = String;
fn next(&mut self) -> Option<String> {
if let Some(p) = self.pending.pop() {
return Some(p);
}
if self.done {
return None;
}
match self.re.find_from(&mut self.st, self.cursor) {
Some((s, e)) => {
let bs = self.st.char_to_byte[self.cursor];
let be = self.st.char_to_byte[s];
let piece = self.haystack[bs..be].to_string();
for g in 1..=self.re.n_groups {
let m = self.re.match_from_state(self.haystack, &self.st);
let grp = m.group(g).map(str::to_string).unwrap_or_default();
self.pending.insert(0, grp);
}
if e == s {
if e < self.st.len() {
let skip = self.haystack
[self.st.char_to_byte[e]..self.st.char_to_byte[e + 1]]
.to_string();
self.pending.insert(0, skip);
}
self.cursor = e + 1;
} else {
self.cursor = e;
}
Some(piece)
}
None => {
let bs = self.st.char_to_byte[self.cursor];
let piece = self.haystack[bs..].to_string();
self.done = true;
Some(piece)
}
}
}
}
pub use crate::flags as flags_module;
#[allow(unused_imports)]
use flags_module as _flags;