use std::ops::Range;
use std::str::FromStr;
use std::sync::Arc;
use std::vec;
use crate::{
common::BlockDatabase,
compile::{Builder, Flags, Pattern},
runtime::Matching,
Error, Result,
};
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Match<'t> {
text: &'t str,
start: usize,
end: usize,
}
impl<'t> Match<'t> {
#[inline]
pub fn start(&self) -> usize {
self.start
}
#[inline]
pub fn end(&self) -> usize {
self.end
}
#[inline]
pub fn range(&self) -> Range<usize> {
self.start..self.end
}
#[inline]
pub fn as_str(&self) -> &'t str {
&self.text[self.start..self.end]
}
#[inline]
fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> {
Match {
text: haystack,
start,
end,
}
}
}
impl<'t> From<Match<'t>> for &'t str {
fn from(m: Match<'t>) -> &'t str {
m.as_str()
}
}
impl<'t> From<Match<'t>> for Range<usize> {
fn from(m: Match<'t>) -> Range<usize> {
m.range()
}
}
pub struct Matches<'t>(&'t str, vec::IntoIter<Range<usize>>);
impl<'t> Matches<'t> {
pub fn text(&self) -> &'t str {
self.0
}
}
impl<'t> Iterator for Matches<'t> {
type Item = Match<'t>;
fn next(&mut self) -> Option<Self::Item> {
self.1.next().map(|range| Match::new(self.0, range.start, range.end))
}
}
impl<'t> DoubleEndedIterator for Matches<'t> {
fn next_back(&mut self) -> Option<Self::Item> {
self.1
.next_back()
.map(|range| Match::new(self.0, range.start, range.end))
}
}
#[derive(Clone)]
pub struct Regex(pub(crate) Arc<BlockDatabase>);
impl FromStr for Regex {
type Err = Error;
fn from_str(s: &str) -> Result<Regex> {
Regex::new(s)
}
}
impl Regex {
pub fn new<S: Into<String>>(re: S) -> Result<Regex> {
Self::with_flags(re, Flags::empty())
}
pub(crate) fn with_flags<S: Into<String>>(re: S, flags: Flags) -> Result<Regex> {
Pattern::with_flags(re, flags | Flags::SOM_LEFTMOST | Flags::UTF8)?
.build()
.map(|db| Regex(Arc::new(db)))
}
pub fn is_match(&self, text: &str) -> bool {
let mut matched = false;
let s = self.0.alloc_scratch().unwrap();
let _ = self.0.scan(text, &s, |_, _, _, _| {
matched = true;
Matching::Terminate
});
matched
}
pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> {
let mut matched = vec![];
let s = self.0.alloc_scratch().unwrap();
let _ = self.0.scan(text, &s, |_, from, to, _| {
matched.push((from as usize, to as usize));
Matching::Terminate
});
matched
.first()
.map(|&(start, end)| Match::new(&text[start..end], start, end))
}
pub fn find_iter<'t>(&self, text: &'t str) -> Matches<'t> {
let mut matched = Vec::<Range<usize>>::new();
let s = self.0.alloc_scratch().unwrap();
let _ = self.0.scan(text, &s, |_, from, to, _| {
let range = from as usize..to as usize;
match matched.last() {
Some(last) if last.start == range.start && last.end < range.end => {
*matched.last_mut().unwrap() = range;
}
_ => matched.push(range),
}
Matching::Continue
});
Matches(text, matched.into_iter())
}
pub fn split<'t>(&self, text: &'t str) -> Split<'t> {
Split {
finder: self.find_iter(text),
last: 0,
}
}
pub fn splitn<'t>(&self, text: &'t str, limit: usize) -> SplitN<'t> {
SplitN {
splits: self.split(text),
n: limit,
}
}
}
pub struct Split<'t> {
finder: Matches<'t>,
last: usize,
}
impl<'t> Iterator for Split<'t> {
type Item = &'t str;
fn next(&mut self) -> Option<&'t str> {
let text = self.finder.text();
match self.finder.next() {
None => {
if self.last > text.len() {
None
} else {
let s = &text[self.last..];
self.last = text.len() + 1; Some(s)
}
}
Some(m) => {
let matched = &text[self.last..m.start()];
self.last = m.end();
Some(matched)
}
}
}
}
pub struct SplitN<'t> {
splits: Split<'t>,
n: usize,
}
impl<'t> Iterator for SplitN<'t> {
type Item = &'t str;
fn next(&mut self) -> Option<&'t str> {
if self.n == 0 {
return None;
}
self.n -= 1;
if self.n > 0 {
return self.splits.next();
}
let text = self.splits.finder.text();
if self.splits.last > text.len() {
None
} else {
Some(&text[self.splits.last..])
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_find_iter() {
let regex = r"\b\w{13}\b";
let text = "Retroactively relinquishing remunerations is reprehensible.";
assert_eq!(
regex::Regex::new(regex)
.unwrap()
.find_iter(text)
.map(|m| m.range())
.collect::<Vec<_>>(),
super::Regex::new(regex)
.unwrap()
.find_iter(text)
.map(|m| m.range())
.collect::<Vec<_>>()
);
}
}