weighted_regexp/
lib.rs

1#![cfg_attr(test, feature(plugin))]
2#![cfg_attr(test, plugin(quickcheck_macros))]
3
4#[cfg(test)]
5extern crate quickcheck;
6
7extern crate num_traits;
8
9use num_traits::{Zero, zero, One, one};
10use std::ops::{Add, Mul};
11
12pub trait Regex<T, M> {
13    fn empty(&self) -> bool;
14    fn shift(&mut self, c : &T, mark : M) -> M;
15    fn reset(&mut self);
16}
17
18impl<T, M> Regex<T, M> for Box<Regex<T, M>> {
19    fn empty(&self) -> bool { self.as_ref().empty() }
20    fn shift(&mut self, c : &T, mark : M) -> M { self.as_mut().shift(c, mark) }
21    fn reset(&mut self) { self.as_mut().reset() }
22}
23
24pub struct Epsilon;
25
26impl<T, M: Zero> Regex<T, M> for Epsilon {
27    fn empty(&self) -> bool { true }
28    fn shift(&mut self, _c : &T, _mark : M) -> M { zero() }
29    fn reset(&mut self) { }
30}
31
32impl<T, M: Mul<Output=M>, F: Fn(&T) -> M> Regex<T, M> for F {
33    fn empty(&self) -> bool { false }
34    fn shift(&mut self, c : &T, mark : M) -> M {
35        mark * self(c)
36    }
37    fn reset(&mut self) { }
38}
39
40pub struct Alternative<L, R> {
41    left : L,
42    right : R,
43}
44
45impl<L, R> Alternative<L, R> {
46    pub fn new(left : L, right : R) -> Self
47    {
48        Alternative { left : left, right : right }
49    }
50}
51
52impl<T, M: Add<Output=M> + Clone, L, R> Regex<T, M> for Alternative<L, R> where L : Regex<T, M> + Sized, R : Regex<T, M> + Sized {
53    fn empty(&self) -> bool { self.left.empty() || self.right.empty() }
54    fn shift(&mut self, c : &T, mark : M) -> M {
55        self.left.shift(c, mark.clone()) + self.right.shift(c, mark)
56    }
57    fn reset(&mut self) {
58        self.left.reset();
59        self.right.reset();
60    }
61}
62
63pub struct Sequence<M, L, R> {
64    left : L,
65    right : R,
66    marked_left : M,
67}
68
69impl<M: Zero, L, R> Sequence<M, L, R> {
70    pub fn new(left : L, right : R) -> Self
71    {
72        Sequence { left : left, right : right, marked_left : zero() }
73    }
74}
75
76impl<T, M: Zero + Mul + Clone, L, R> Regex<T, M> for Sequence<M, L, R> where L : Regex<T, M> + Sized, R : Regex<T, M> + Sized {
77    fn empty(&self) -> bool { self.left.empty() && self.right.empty() }
78    fn shift(&mut self, c : &T, mark : M) -> M {
79        // Shift the new mark through the left child.
80        let marked_left = self.left.shift(c, mark.clone());
81
82        // Save the new mark that came out of the left child and get the
83        // previously-saved mark. The previous left mark is the one we
84        // feed into the right child.
85        let mut old_marked_left = marked_left.clone();
86        std::mem::swap(&mut self.marked_left, &mut old_marked_left);
87
88        // If the left child could match the empty string, then in
89        // addition to its previous mark, we also feed our new input
90        // mark to the right child.
91        if self.left.empty() {
92            old_marked_left = old_marked_left + mark;
93        }
94        let marked_right = self.right.shift(c, old_marked_left);
95
96        // Whatever the right child produced is our result, except if
97        // the right child could match the empty string, then the left
98        // child's result is included in the output too.
99        if self.right.empty() {
100            marked_left + marked_right
101        } else {
102            marked_right
103        }
104    }
105    fn reset(&mut self) {
106        self.left.reset();
107        self.right.reset();
108        self.marked_left = zero();
109    }
110}
111
112pub struct Repetition<M, R> {
113    re : R,
114    marked : M,
115}
116
117impl<M: Zero, R> Repetition<M, R> {
118    pub fn new(re : R) -> Self
119    {
120        Repetition { re : re, marked : zero() }
121    }
122}
123
124impl<T, M: Zero + Clone, R> Regex<T, M> for Repetition<M, R> where R : Regex<T, M> + Sized {
125    fn empty(&self) -> bool { true }
126    fn shift(&mut self, c : &T, mark : M) -> M {
127        self.marked = self.re.shift(c, mark + self.marked.clone());
128        self.marked.clone()
129    }
130    fn reset(&mut self) {
131        self.re.reset();
132        self.marked = zero();
133    }
134}
135
136pub fn match_regex<T, M, I>(re : &mut Regex<T, M>, over : I) -> M
137    where I: IntoIterator<Item=T>, M: Zero + One
138{
139    let mut iter = over.into_iter();
140    let mut result;
141    if let Some(c) = iter.next() {
142        result = re.shift(&c, one());
143    } else {
144        return if re.empty() { one() } else { zero() };
145    }
146    while let Some(c) = iter.next() {
147        result = re.shift(&c, zero());
148    }
149    re.reset();
150    return result;
151}
152
153#[derive(Copy, Clone)]
154pub struct Match(bool);
155
156impl Add for Match {
157    type Output = Match;
158    fn add(self, rhs : Match) -> Match { Match(self.0 || rhs.0) }
159}
160
161impl Zero for Match {
162    fn zero() -> Match { Match(false) }
163    fn is_zero(&self) -> bool { !self.0 }
164}
165
166impl Mul for Match {
167    type Output = Match;
168    fn mul(self, rhs : Match) -> Match { Match(self.0 && rhs.0) }
169}
170
171impl One for Match {
172    fn one() -> Match { Match(true) }
173}
174
175pub fn has_match<T, I>(re : &mut Regex<T, Match>, over : I) -> bool
176    where I: IntoIterator<Item=T>
177{
178    match_regex(re, over).0
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184
185    #[quickcheck]
186    fn epsilon_bool(to_match : Vec<bool>) -> bool {
187        to_match.is_empty() == has_match(&mut Epsilon, to_match)
188    }
189
190    #[quickcheck]
191    fn epsilon_char(to_match : String) -> bool {
192        to_match.is_empty() == has_match(&mut Epsilon, to_match.chars())
193    }
194
195    #[quickcheck]
196    fn fn_bool(to_match : Vec<bool>) -> bool {
197        ({
198            let mut iter = to_match.iter();
199            match (iter.next(), iter.next()) {
200                (Some(&expected), None) => expected,
201                _ => false,
202            }
203        }) == has_match(&mut |c: &bool| Match(*c), to_match)
204    }
205
206    #[quickcheck]
207    fn fn_char(to_match : String) -> bool {
208        ({
209            let mut iter = to_match.chars();
210            match (iter.next(), iter.next()) {
211                (Some(expected), None) => expected.is_uppercase(),
212                _ => false,
213            }
214        }) == has_match(&mut |c: &char| Match(c.is_uppercase()), to_match.chars())
215    }
216
217    #[quickcheck]
218    fn fn_any_bool(to_match : Vec<bool>) -> bool {
219        let mut re = |_: &bool| Match(true);
220        (to_match.len() == 1) == has_match(&mut re, to_match)
221    }
222
223    #[quickcheck]
224    fn fn_any_char(to_match : String) -> bool {
225        let mut re = |_: &char| Match(true);
226        (to_match.chars().count() == 1) == has_match(&mut re, to_match.chars())
227    }
228
229    #[quickcheck]
230    fn fn_none_bool(to_match : Vec<bool>) -> bool {
231        let mut re = |_: &bool| Match(false);
232        !has_match(&mut re, to_match)
233    }
234
235    #[quickcheck]
236    fn fn_none_char(to_match : String) -> bool {
237        let mut re = |_: &char| Match(false);
238        !has_match(&mut re, to_match.chars())
239    }
240
241    #[quickcheck]
242    fn alternative(to_match : String) -> bool {
243        let a = |c: &char| Match(*c == 'a');
244        let b = |c: &char| Match(*c == 'b');
245        ({
246            let mut iter = to_match.chars();
247            match (iter.next(), iter.next()) {
248                (Some(expected), None) => expected == 'a' || expected == 'b',
249                _ => false,
250            }
251        }) == has_match(&mut Alternative::new(a, b), to_match.chars())
252    }
253
254    #[quickcheck]
255    fn alternative_any_epsilon(to_match : String) -> bool {
256        let re = |_: &char| Match(true);
257        (to_match.chars().count() <= 1) ==
258            has_match(&mut Alternative::new(re, Epsilon), to_match.chars())
259    }
260
261    #[quickcheck]
262    fn alternative_epsilon_any(to_match : String) -> bool {
263        let re = |_: &char| Match(true);
264        (to_match.chars().count() <= 1) ==
265            has_match(&mut Alternative::new(Epsilon, re), to_match.chars())
266    }
267
268    #[quickcheck]
269    fn sequence_epsilon_left_identity(to_match : String) -> bool {
270        let mut re = |c: &char| Match(c.is_uppercase());
271        has_match(&mut Sequence::new(Epsilon, &re), to_match.chars()) ==
272            has_match(&mut re, to_match.chars())
273    }
274
275    #[quickcheck]
276    fn sequence_epsilon_right_identity(to_match : String) -> bool {
277        let mut re = |c: &char| Match(c.is_uppercase());
278        has_match(&mut Sequence::new(&re, Epsilon), to_match.chars()) ==
279            has_match(&mut re, to_match.chars())
280    }
281
282    #[quickcheck]
283    fn repeat_epsilon(to_match : String) -> bool {
284        to_match.is_empty() ==
285            has_match(&mut Repetition::new(Epsilon), to_match.chars())
286    }
287
288    #[quickcheck]
289    fn repeat_any(to_match : String) -> bool {
290        let re = |_: &char| Match(true);
291        has_match(&mut Repetition::new(re), to_match.chars())
292    }
293
294    #[quickcheck]
295    fn repeat_char(to_match : String) -> bool {
296        let re = |c: &char| Match(*c == 'A');
297        to_match.chars().all(|c| c == 'A') ==
298            has_match(&mut Repetition::new(re), to_match.chars())
299    }
300
301    #[quickcheck]
302    fn repeat_repeat_char(to_match : String) -> bool {
303        let re = |c: &char| Match(*c == 'A');
304        to_match.chars().all(|c| c == 'A') ==
305            has_match(&mut Repetition::new(Repetition::new(re)), to_match.chars())
306    }
307}