use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt;
use std::ops::Index;
use std::str::FromStr;
use std::sync::Arc;
use memchr::memchr;
use exec::{Exec, ExecNoSync};
use expand::expand_bytes;
use error::Error;
use re_builder::bytes::RegexBuilder;
use re_trait::{self, RegularExpression, SubCapturesPosIter};
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct Match<'t> {
text: &'t [u8],
start: usize,
end: usize,
}
impl<'t> Match<'t> {
#[inline]
pub fn start(&self) -> usize {
self.start
}
#[inline]
pub fn end(&self) -> usize {
self.end
}
#[inline]
pub fn as_bytes(&self) -> &'t [u8] {
&self.text[self.start..self.end]
}
#[inline]
fn new(haystack: &'t [u8], start: usize, end: usize) -> Match<'t> {
Match {
text: haystack,
start: start,
end: end,
}
}
}
#[derive(Clone)]
pub struct Regex(Exec);
impl fmt::Display for Regex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl fmt::Debug for Regex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
#[doc(hidden)]
impl From<Exec> for Regex {
fn from(exec: Exec) -> Regex {
Regex(exec)
}
}
impl FromStr for Regex {
type Err = Error;
fn from_str(s: &str) -> Result<Regex, Error> {
Regex::new(s)
}
}
impl Regex {
pub fn new(re: &str) -> Result<Regex, Error> {
RegexBuilder::new(re).build()
}
pub fn is_match(&self, text: &[u8]) -> bool {
self.is_match_at(text, 0)
}
pub fn find<'t>(&self, text: &'t [u8]) -> Option<Match<'t>> {
self.find_at(text, 0)
}
pub fn find_iter<'r, 't>(&'r self, text: &'t [u8]) -> Matches<'r, 't> {
Matches(self.0.searcher().find_iter(text))
}
pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> {
let mut locs = self.capture_locations();
self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
text: text,
locs: locs.0,
named_groups: self.0.capture_name_idx().clone(),
})
}
pub fn captures_iter<'r, 't>(
&'r self,
text: &'t [u8],
) -> CaptureMatches<'r, 't> {
CaptureMatches(self.0.searcher().captures_iter(text))
}
pub fn split<'r, 't>(&'r self, text: &'t [u8]) -> Split<'r, 't> {
Split {
finder: self.find_iter(text),
last: 0,
}
}
pub fn splitn<'r, 't>(
&'r self,
text: &'t [u8],
limit: usize,
) -> SplitN<'r, 't> {
SplitN {
splits: self.split(text),
n: limit,
}
}
pub fn replace<'t, R: Replacer>(
&self,
text: &'t [u8],
rep: R,
) -> Cow<'t, [u8]> {
self.replacen(text, 1, rep)
}
pub fn replace_all<'t, R: Replacer>(
&self,
text: &'t [u8],
rep: R,
) -> Cow<'t, [u8]> {
self.replacen(text, 0, rep)
}
pub fn replacen<'t, R: Replacer>(
&self,
text: &'t [u8],
limit: usize,
mut rep: R,
) -> Cow<'t, [u8]> {
if let Some(rep) = rep.no_expansion() {
let mut it = self.find_iter(text).enumerate().peekable();
if it.peek().is_none() {
return Cow::Borrowed(text);
}
let mut new = Vec::with_capacity(text.len());
let mut last_match = 0;
for (i, m) in it {
if limit > 0 && i >= limit {
break
}
new.extend_from_slice(&text[last_match..m.start()]);
new.extend_from_slice(&rep);
last_match = m.end();
}
new.extend_from_slice(&text[last_match..]);
return Cow::Owned(new);
}
let mut it = self.captures_iter(text).enumerate().peekable();
if it.peek().is_none() {
return Cow::Borrowed(text);
}
let mut new = Vec::with_capacity(text.len());
let mut last_match = 0;
for (i, cap) in it {
if limit > 0 && i >= limit {
break
}
let m = cap.get(0).unwrap();
new.extend_from_slice(&text[last_match..m.start()]);
rep.replace_append(&cap, &mut new);
last_match = m.end();
}
new.extend_from_slice(&text[last_match..]);
Cow::Owned(new)
}
}
impl Regex {
pub fn shortest_match(&self, text: &[u8]) -> Option<usize> {
self.shortest_match_at(text, 0)
}
pub fn shortest_match_at(
&self,
text: &[u8],
start: usize,
) -> Option<usize> {
self.0.searcher().shortest_match_at(text, start)
}
pub fn is_match_at(&self, text: &[u8], start: usize) -> bool {
self.shortest_match_at(text, start).is_some()
}
pub fn find_at<'t>(
&self,
text: &'t [u8],
start: usize,
) -> Option<Match<'t>> {
self.0.searcher().find_at(text, start)
.map(|(s, e)| Match::new(text, s, e))
}
pub fn captures_read<'t>(
&self,
locs: &mut CaptureLocations,
text: &'t [u8],
) -> Option<Match<'t>> {
self.captures_read_at(locs, text, 0)
}
pub fn captures_read_at<'t>(
&self,
locs: &mut CaptureLocations,
text: &'t [u8],
start: usize,
) -> Option<Match<'t>> {
self.0
.searcher()
.captures_read_at(&mut locs.0, text, start)
.map(|(s, e)| Match::new(text, s, e))
}
#[doc(hidden)]
pub fn read_captures_at<'t>(
&self,
locs: &mut CaptureLocations,
text: &'t [u8],
start: usize,
) -> Option<Match<'t>> {
self.captures_read_at(locs, text, start)
}
}
impl Regex {
pub fn as_str(&self) -> &str {
&self.0.regex_strings()[0]
}
pub fn capture_names(&self) -> CaptureNames {
CaptureNames(self.0.capture_names().iter())
}
pub fn captures_len(&self) -> usize {
self.0.capture_names().len()
}
pub fn capture_locations(&self) -> CaptureLocations {
CaptureLocations(self.0.searcher().locations())
}
#[doc(hidden)]
pub fn locations(&self) -> CaptureLocations {
CaptureLocations(self.0.searcher().locations())
}
}
pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSync<'r>>);
impl<'r, 't> Iterator for Matches<'r, 't> {
type Item = Match<'t>;
fn next(&mut self) -> Option<Match<'t>> {
let text = self.0.text();
self.0.next().map(|(s, e)| Match::new(text, s, e))
}
}
pub struct CaptureMatches<'r, 't>(re_trait::CaptureMatches<'t, ExecNoSync<'r>>);
impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
type Item = Captures<'t>;
fn next(&mut self) -> Option<Captures<'t>> {
self.0.next().map(|locs| Captures {
text: self.0.text(),
locs: locs,
named_groups: self.0.regex().capture_name_idx().clone(),
})
}
}
pub struct Split<'r, 't> {
finder: Matches<'r, 't>,
last: usize,
}
impl<'r, 't> Iterator for Split<'r, 't> {
type Item = &'t [u8];
fn next(&mut self) -> Option<&'t [u8]> {
let text = self.finder.0.text();
match self.finder.next() {
None => {
if self.last >= text.len() {
None
} else {
let s = &text[self.last..];
self.last = text.len();
Some(s)
}
}
Some(m) => {
let matched = &text[self.last..m.start()];
self.last = m.end();
Some(matched)
}
}
}
}
pub struct SplitN<'r, 't> {
splits: Split<'r, 't>,
n: usize,
}
impl<'r, 't> Iterator for SplitN<'r, 't> {
type Item = &'t [u8];
fn next(&mut self) -> Option<&'t [u8]> {
if self.n == 0 {
return None
}
self.n -= 1;
if self.n == 0 {
let text = self.splits.finder.0.text();
Some(&text[self.splits.last..])
} else {
self.splits.next()
}
}
}
pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
impl<'r> Iterator for CaptureNames<'r> {
type Item = Option<&'r str>;
fn next(&mut self) -> Option<Option<&'r str>> {
self.0.next().as_ref()
.map(|slot| slot.as_ref().map(|name| name.as_ref()))
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
#[derive(Clone, Debug)]
pub struct CaptureLocations(re_trait::Locations);
#[doc(hidden)]
pub type Locations = CaptureLocations;
impl CaptureLocations {
#[inline]
pub fn get(&self, i: usize) -> Option<(usize, usize)> {
self.0.pos(i)
}
#[inline]
pub fn len(&self) -> usize {
self.0.len()
}
#[doc(hidden)]
#[inline]
pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
self.get(i)
}
}
pub struct Captures<'t> {
text: &'t [u8],
locs: re_trait::Locations,
named_groups: Arc<HashMap<String, usize>>,
}
impl<'t> Captures<'t> {
pub fn get(&self, i: usize) -> Option<Match<'t>> {
self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e))
}
pub fn name(&self, name: &str) -> Option<Match<'t>> {
self.named_groups.get(name).and_then(|&i| self.get(i))
}
pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
SubCaptureMatches {
caps: self,
it: self.locs.iter(),
}
}
pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) {
expand_bytes(self, replacement, dst)
}
#[inline]
pub fn len(&self) -> usize {
self.locs.len()
}
}
impl<'t> fmt::Debug for Captures<'t> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
}
}
struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn escape_bytes(bytes: &[u8]) -> String {
let mut s = String::new();
for &b in bytes {
s.push_str(&escape_byte(b));
}
s
}
fn escape_byte(byte: u8) -> String {
use std::ascii::escape_default;
let escaped: Vec<u8> = escape_default(byte).collect();
String::from_utf8_lossy(&escaped).into_owned()
}
let slot_to_name: HashMap<&usize, &String> =
self.0.named_groups.iter().map(|(a, b)| (b, a)).collect();
let mut map = f.debug_map();
for (slot, m) in self.0.locs.iter().enumerate() {
let m = m.map(|(s, e)| escape_bytes(&self.0.text[s..e]));
if let Some(name) = slot_to_name.get(&slot) {
map.entry(&name, &m);
} else {
map.entry(&slot, &m);
}
}
map.finish()
}
}
impl<'t> Index<usize> for Captures<'t> {
type Output = [u8];
fn index(&self, i: usize) -> &[u8] {
self.get(i).map(|m| m.as_bytes())
.unwrap_or_else(|| panic!("no group at index '{}'", i))
}
}
impl<'t, 'i> Index<&'i str> for Captures<'t> {
type Output = [u8];
fn index<'a>(&'a self, name: &'i str) -> &'a [u8] {
self.name(name).map(|m| m.as_bytes())
.unwrap_or_else(|| panic!("no group named '{}'", name))
}
}
pub struct SubCaptureMatches<'c, 't: 'c> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
}
impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
type Item = Option<Match<'t>>;
fn next(&mut self) -> Option<Option<Match<'t>>> {
self.it.next()
.map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e)))
}
}
pub trait Replacer {
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>);
fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
None
}
fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
ReplacerRef(self)
}
}
#[derive(Debug)]
pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
self.0.replace_append(caps, dst)
}
fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
self.0.no_expansion()
}
}
impl<'a> Replacer for &'a [u8] {
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
caps.expand(*self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
match memchr(b'$', *self) {
Some(_) => None,
None => Some(Cow::Borrowed(*self)),
}
}
}
impl<F, T> Replacer for F where F: FnMut(&Captures) -> T, T: AsRef<[u8]> {
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
dst.extend_from_slice((*self)(caps).as_ref());
}
}
pub struct NoExpand<'t>(pub &'t [u8]);
impl<'t> Replacer for NoExpand<'t> {
fn replace_append(&mut self, _: &Captures, dst: &mut Vec<u8>) {
dst.extend_from_slice(self.0);
}
fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
Some(Cow::Borrowed(self.0))
}
}