use self::StartsWithStateInternal::*;
pub trait Automaton {
type State;
fn start(&self) -> Self::State;
fn is_match(&self, state: &Self::State) -> bool;
fn can_match(&self, _state: &Self::State) -> bool {
true
}
fn will_always_match(&self, _state: &Self::State) -> bool {
false
}
fn accept(&self, state: &Self::State, byte: u8) -> Self::State;
fn starts_with(self) -> StartsWith<Self> where Self: Sized {
StartsWith(self)
}
fn union<Rhs: Automaton>(self, rhs: Rhs) -> Union<Self, Rhs> where Self: Sized {
Union(self, rhs)
}
fn intersection<Rhs: Automaton>(self, rhs: Rhs) -> Intersection<Self, Rhs> where Self: Sized {
Intersection(self, rhs)
}
fn complement(self) -> Complement<Self> where Self: Sized {
Complement(self)
}
}
impl<'a, T: Automaton> Automaton for &'a T {
type State = T::State;
fn start(&self) -> Self::State {
(*self).start()
}
fn is_match(&self, state: &Self::State) -> bool {
(*self).is_match(state)
}
fn can_match(&self, state: &Self::State) -> bool {
(*self).can_match(state)
}
fn will_always_match(&self, state: &Self::State) -> bool {
(*self).will_always_match(state)
}
fn accept(&self, state: &Self::State, byte: u8) -> Self::State {
(*self).accept(state, byte)
}
}
pub struct AlwaysMatch;
impl Automaton for AlwaysMatch {
type State = ();
fn start(&self) -> () { () }
fn is_match(&self, _: &()) -> bool { true }
fn can_match(&self, _: &()) -> bool { true }
fn will_always_match(&self, _: &()) -> bool { true }
fn accept(&self, _: &(), _: u8) -> () { () }
}
pub struct StartsWith<A>(A);
pub struct StartsWithState<A: Automaton>(StartsWithStateInternal<A>);
enum StartsWithStateInternal<A: Automaton> {
Done,
Running(A::State)
}
impl<A: Automaton> Automaton for StartsWith<A> {
type State = StartsWithState<A>;
fn start(&self) -> StartsWithState<A> {
StartsWithState({
let inner = self.0.start();
if self.0.is_match(&inner) {
Done
} else {
Running(inner)
}
})
}
fn is_match(&self, state: &StartsWithState<A>) -> bool {
match state.0 {
Done => true,
Running(_) => false
}
}
fn can_match(&self, state: &StartsWithState<A>) -> bool {
match state.0 {
Done => true,
Running(ref inner) => self.0.can_match(inner)
}
}
fn will_always_match(&self, state: &StartsWithState<A>) -> bool {
match state.0 {
Done => true,
Running(_) => false
}
}
fn accept(&self, state: &StartsWithState<A>, byte: u8) -> StartsWithState<A> {
StartsWithState(
match state.0 {
Done => Done,
Running(ref inner) => {
let next_inner = self.0.accept(inner, byte);
if self.0.is_match(&next_inner) {
Done
} else {
Running(next_inner)
}
}
}
)
}
}
pub struct Union<A, B>(A, B);
pub struct UnionState<A: Automaton, B: Automaton>(A::State, B::State);
impl<A: Automaton, B: Automaton> Automaton for Union<A, B> {
type State = UnionState<A, B>;
fn start(&self) -> UnionState<A, B> {
UnionState(
self.0.start(),
self.1.start()
)
}
fn is_match(&self, state: &UnionState<A, B>) -> bool {
self.0.is_match(&state.0) || self.1.is_match(&state.1)
}
fn can_match(&self, state: &UnionState<A, B>) -> bool {
self.0.can_match(&state.0) || self.1.can_match(&state.1)
}
fn will_always_match(&self, state: &UnionState<A, B>) -> bool {
self.0.will_always_match(&state.0) || self.1.will_always_match(&state.1)
}
fn accept(&self, state: &UnionState<A, B>, byte: u8) -> UnionState<A, B> {
UnionState(
self.0.accept(&state.0, byte),
self.1.accept(&state.1, byte)
)
}
}
pub struct Intersection<A, B>(A, B);
pub struct IntersectionState<A: Automaton, B: Automaton>(A::State, B::State);
impl<A: Automaton, B: Automaton> Automaton for Intersection<A, B> {
type State = IntersectionState<A, B>;
fn start(&self) -> IntersectionState<A, B> {
IntersectionState(
self.0.start(),
self.1.start()
)
}
fn is_match(&self, state: &IntersectionState<A, B>) -> bool {
self.0.is_match(&state.0) && self.1.is_match(&state.1)
}
fn can_match(&self, state: &IntersectionState<A, B>) -> bool {
self.0.can_match(&state.0) && self.1.can_match(&state.1)
}
fn will_always_match(&self, state: &IntersectionState<A, B>) -> bool {
self.0.will_always_match(&state.0) && self.1.will_always_match(&state.1)
}
fn accept(&self, state: &IntersectionState<A, B>, byte: u8) -> IntersectionState<A, B> {
IntersectionState(
self.0.accept(&state.0, byte),
self.1.accept(&state.1, byte)
)
}
}
pub struct Complement<A>(A);
pub struct ComplementState<A: Automaton>(A::State);
impl<A: Automaton> Automaton for Complement<A> {
type State = ComplementState<A>;
fn start(&self) -> ComplementState<A> {
ComplementState(self.0.start())
}
fn is_match(&self, state: &ComplementState<A>) -> bool {
!self.0.is_match(&state.0)
}
fn can_match(&self, state: &ComplementState<A>) -> bool {
!self.0.will_always_match(&state.0)
}
fn will_always_match(&self, state: &ComplementState<A>) -> bool {
!self.0.can_match(&state.0)
}
fn accept(&self, state: &ComplementState<A>, byte: u8) -> ComplementState<A> {
ComplementState(self.0.accept(&state.0, byte))
}
}
#[cfg(test)]
mod test {
use ::{IntoStreamer, Streamer, Levenshtein, Regex, Set, Automaton};
use set::OpBuilder;
static WORDS: &'static str = include_str!("../data/words-10000");
fn get_set() -> Set {
Set::from_iter(WORDS.lines()).unwrap()
}
#[test]
fn complement_small() {
let keys = vec!["fa", "fo", "fob", "focus", "foo", "food", "foul"];
let set = Set::from_iter(keys).unwrap();
let lev = Levenshtein::new("foo", 1).unwrap();
let stream = set.search(lev.complement()).into_stream();
let keys = stream.into_strs().unwrap();
assert_eq!(keys, vec!["fa", "focus", "foul"]);
}
#[test]
fn startswith_small() {
let keys = vec!["", "cooing", "fa", "fo", "fob", "focus", "foo", "food", "foul", "fritter", "frothing"];
let set = Set::from_iter(keys).unwrap();
let lev = Levenshtein::new("foo", 1).unwrap();
let stream = set.search(lev.starts_with()).into_stream();
let keys = stream.into_strs().unwrap();
assert_eq!(keys, vec!["cooing", "fo", "fob", "focus", "foo", "food", "foul", "frothing"]);
}
#[test]
fn intersection_small() {
let keys = vec!["fa", "fo", "fob", "focus", "foo", "food", "foul"];
let set = Set::from_iter(keys).unwrap();
let lev = Levenshtein::new("foo", 1).unwrap();
let reg = Regex::new("(..)*").unwrap();
let stream = set.search(lev.intersection(reg)).into_stream();
let keys = stream.into_strs().unwrap();
assert_eq!(keys, vec!["fo", "food"]);
}
#[test]
fn union_small() {
let keys = vec!["fa", "fo", "fob", "focus", "foo", "food", "foul"];
let set = Set::from_iter(keys).unwrap();
let lev = Levenshtein::new("foo", 1).unwrap();
let reg = Regex::new("(..)*").unwrap();
let stream = set.search(lev.union(reg)).into_stream();
let keys = stream.into_strs().unwrap();
assert_eq!(keys, vec!["fa", "fo", "fob", "foo", "food", "foul"]);
}
#[test]
fn intersection_large() {
let set = get_set();
let lev = Levenshtein::new("foo", 3).unwrap();
let reg = Regex::new("(..)*").unwrap();
let mut stream1 = set.search((&lev).intersection(®)).into_stream();
let mut stream2 = OpBuilder::new().add(set.search(&lev)).add(set.search(®)).intersection();
while let Some(key1) = stream1.next() {
assert_eq!(stream2.next(), Some(key1));
}
assert_eq!(stream2.next(), None);
}
#[test]
fn union_large() {
let set = get_set();
let lev = Levenshtein::new("foo", 3).unwrap();
let reg = Regex::new("(..)*").unwrap();
let mut stream1 = set.search((&lev).union(®)).into_stream();
let mut stream2 = OpBuilder::new().add(set.search(&lev)).add(set.search(®)).union();
while let Some(key1) = stream1.next() {
assert_eq!(stream2.next(), Some(key1));
}
assert_eq!(stream2.next(), None);
}
}