#[cfg(feature = "std")]
use dense::{self, DenseDFA};
use dfa::DFA;
#[cfg(feature = "std")]
use error::Result;
#[cfg(feature = "std")]
use sparse::SparseDFA;
#[cfg(feature = "std")]
use state_id::StateID;
#[cfg(feature = "std")]
#[derive(Clone, Debug)]
pub struct Regex<D: DFA = DenseDFA<Vec<usize>, usize>> {
forward: D,
reverse: D,
}
#[cfg(not(feature = "std"))]
#[derive(Clone, Debug)]
pub struct Regex<D> {
forward: D,
reverse: D,
}
#[cfg(feature = "std")]
impl Regex {
pub fn new(pattern: &str) -> Result<Regex> {
RegexBuilder::new().build(pattern)
}
}
#[cfg(feature = "std")]
impl Regex<SparseDFA<Vec<u8>, usize>> {
pub fn new_sparse(
pattern: &str,
) -> Result<Regex<SparseDFA<Vec<u8>, usize>>> {
RegexBuilder::new().build_sparse(pattern)
}
}
impl<D: DFA> Regex<D> {
pub fn is_match(&self, input: &[u8]) -> bool {
self.is_match_at(input, 0)
}
pub fn shortest_match(&self, input: &[u8]) -> Option<usize> {
self.shortest_match_at(input, 0)
}
pub fn find(&self, input: &[u8]) -> Option<(usize, usize)> {
self.find_at(input, 0)
}
pub fn is_match_at(&self, input: &[u8], start: usize) -> bool {
self.forward().is_match_at(input, start)
}
pub fn shortest_match_at(
&self,
input: &[u8],
start: usize,
) -> Option<usize> {
self.forward().shortest_match_at(input, start)
}
pub fn find_at(
&self,
input: &[u8],
start: usize,
) -> Option<(usize, usize)> {
let end = match self.forward().find_at(input, start) {
None => return None,
Some(end) => end,
};
let start = self
.reverse()
.rfind(&input[start..end])
.map(|i| start + i)
.expect("reverse search must match if forward search does");
Some((start, end))
}
pub fn find_iter<'r, 't>(
&'r self,
input: &'t [u8],
) -> Matches<'r, 't, D> {
Matches::new(self, input)
}
pub fn from_dfas(forward: D, reverse: D) -> Regex<D> {
Regex { forward, reverse }
}
pub fn forward(&self) -> &D {
&self.forward
}
pub fn reverse(&self) -> &D {
&self.reverse
}
}
#[derive(Clone, Debug)]
pub struct Matches<'r, 't, D: DFA + 'r> {
re: &'r Regex<D>,
text: &'t [u8],
last_end: usize,
last_match: Option<usize>,
}
impl<'r, 't, D: DFA> Matches<'r, 't, D> {
fn new(re: &'r Regex<D>, text: &'t [u8]) -> Matches<'r, 't, D> {
Matches {
re: re,
text: text,
last_end: 0,
last_match: None,
}
}
}
impl<'r, 't, D: DFA> Iterator for Matches<'r, 't, D> {
type Item = (usize, usize);
fn next(&mut self) -> Option<(usize, usize)> {
if self.last_end > self.text.len() {
return None;
}
let (s, e) = match self.re.find_at(self.text, self.last_end) {
None => return None,
Some((s, e)) => (s, e),
};
if s == e {
self.last_end = e + 1;
if Some(e) == self.last_match {
return self.next();
}
} else {
self.last_end = e;
}
self.last_match = Some(e);
Some((s, e))
}
}
#[cfg(feature = "std")]
#[derive(Clone, Debug)]
pub struct RegexBuilder {
dfa: dense::Builder,
}
#[cfg(feature = "std")]
impl RegexBuilder {
pub fn new() -> RegexBuilder {
RegexBuilder {
dfa: dense::Builder::new(),
}
}
pub fn build(
&self,
pattern: &str,
) -> Result<Regex> {
self.build_with_size::<usize>(pattern)
}
pub fn build_sparse(
&self,
pattern: &str,
) -> Result<Regex<SparseDFA<Vec<u8>, usize>>> {
self.build_with_size_sparse::<usize>(pattern)
}
pub fn build_with_size<S: StateID>(
&self,
pattern: &str,
) -> Result<Regex<DenseDFA<Vec<S>, S>>> {
let forward = self.dfa.build_with_size(pattern)?;
let reverse = self.dfa
.clone()
.anchored(true)
.reverse(true)
.longest_match(true)
.build_with_size(pattern)?;
Ok(Regex::from_dfas(forward, reverse))
}
pub fn build_with_size_sparse<S: StateID>(
&self,
pattern: &str,
) -> Result<Regex<SparseDFA<Vec<u8>, S>>> {
let re = self.build_with_size(pattern)?;
let fwd = re.forward().to_sparse()?;
let rev = re.reverse().to_sparse()?;
Ok(Regex::from_dfas(fwd, rev))
}
pub fn anchored(&mut self, yes: bool) -> &mut RegexBuilder {
self.dfa.anchored(yes);
self
}
pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder {
self.dfa.case_insensitive(yes);
self
}
pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder {
self.dfa.ignore_whitespace(yes);
self
}
pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut RegexBuilder {
self.dfa.dot_matches_new_line(yes);
self
}
pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder {
self.dfa.swap_greed(yes);
self
}
pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {
self.dfa.unicode(yes);
self
}
pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut RegexBuilder {
self.dfa.allow_invalid_utf8(yes);
self
}
pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {
self.dfa.nest_limit(limit);
self
}
pub fn minimize(&mut self, yes: bool) -> &mut RegexBuilder {
self.dfa.minimize(yes);
self
}
pub fn premultiply(&mut self, yes: bool) -> &mut RegexBuilder {
self.dfa.premultiply(yes);
self
}
pub fn byte_classes(&mut self, yes: bool) -> &mut RegexBuilder {
self.dfa.byte_classes(yes);
self
}
}
#[cfg(feature = "std")]
impl Default for RegexBuilder {
fn default() -> RegexBuilder {
RegexBuilder::new()
}
}