1
2use std::rc::Rc;
3use std::cmp::Ordering;
4use std::collections::{HashMap,BTreeSet};
5use std::hash::Hash;
6
7
8#[derive(Clone)]
9pub enum RegEx<T:Ord>
10{
11 EmptySet,EmptyString,Single(T),
14 Disjunction(Vec<Rc<RegEx<T>>>),
15 Concatenation(Vec<Rc<RegEx<T>>>),
16 KleeneStar(Rc<RegEx<T>>),
17 Complement(Vec<T>),}
19
20use RegEx::*;
21
22impl<T:Ord> Ord for RegEx<T>
23{
24 fn cmp(&self,other:&Self) -> Ordering
25 {
26 match self
29 {
30 &EmptySet => match other
31 {
32 &EmptySet => Ordering::Equal,
33 _ => Ordering::Less,
34 }
35 &EmptyString => match other
36 {
37 &EmptySet => Ordering::Greater,
38 &EmptyString => Ordering::Equal,
39 _ => Ordering::Less,
40 },
41 &Single(ref lit) => match other
42 {
43 &EmptySet => Ordering::Greater,
44 &EmptyString => Ordering::Greater,
45 &Single(ref lit_other) => lit.cmp(lit_other),
46 _ => Ordering::Less,
47 },
48 &Disjunction(ref v) => match other
49 {
50 &EmptySet => Ordering::Greater,
51 &EmptyString => Ordering::Greater,
52 &Single(_) => Ordering::Greater,
53 &Disjunction(ref v_other) => v.cmp(v_other),
54 _ => Ordering::Less,
55 },
56 &Concatenation(ref v) => match other
57 {
58 &EmptySet => Ordering::Greater,
59 &EmptyString => Ordering::Greater,
60 &Single(_) => Ordering::Greater,
61 &Disjunction(_) => Ordering::Greater,
62 &Concatenation(ref v_other) => v.cmp(v_other),
63 _ => Ordering::Less,
64 },
65 &KleeneStar(ref e) => match other
66 {
67 &EmptySet => Ordering::Greater,
68 &EmptyString => Ordering::Greater,
69 &Single(_) => Ordering::Greater,
70 &Disjunction(_) => Ordering::Greater,
71 &Concatenation(_) => Ordering::Greater,
72 &KleeneStar(ref e_other) => e.cmp(e_other),
73 _ => Ordering::Less,
74 },
75 &Complement(ref e) => match other
76 {
77 &EmptySet => Ordering::Greater,
78 &EmptyString => Ordering::Greater,
79 &Single(_) => Ordering::Greater,
80 &Disjunction(_) => Ordering::Greater,
81 &Concatenation(_) => Ordering::Greater,
82 &KleeneStar(_) => Ordering::Greater,
83 &Complement(ref e_other) => e.cmp(e_other),
84 },
85 }
86 }
87}
88
89impl<T:Ord> PartialOrd for RegEx<T>
90{
91 fn partial_cmp(&self,other:&Self) -> Option<Ordering>
92 {
93 Some(self.cmp(other))
96 }
97}
98
99impl<T:Ord> PartialEq for RegEx<T>
100{
101 fn eq(&self, other:&Self) -> bool
102 {
103 self.cmp(other)==Ordering::Equal
106 }
107}
108
109impl<T:Ord> Eq for RegEx<T> {}
110
111impl<T:Ord+Clone> RegEx<T>
112{
113 fn normal_form(&self) -> Self
114 {
115 match self
116 {
117 &EmptySet => EmptySet,
118 &EmptyString => EmptyString,
119 &Single(ref lit) => Single(lit.clone()),
120 &Disjunction(ref v) =>
121 {
122 let mut w:Vec<RegEx<T>>=v.iter().map(|e|e.normal_form()).filter(|e|*e!=EmptySet).collect();
123 let mut index=0;
124 while index<w.len()
125 {
126 if let Disjunction(ref rc_sub)=w[index].clone()
127 {
128 w.remove(index);
129 let mut sub:Vec<RegEx<T>>=rc_sub.iter().map(|e|Rc::as_ref(e).clone()).collect();
130 w.append(&mut sub);
131 }
132 else
133 {
134 index+=1;
135 }
136 }
137 w.sort();
138 let n=w.len();
139 if n==0
140 {
141 EmptySet
142 }
143 else if n==1
144 {
145 w[0].clone()
146 }
147 else
148 {
149 Disjunction(w.iter().map(|e|Rc::new(e.clone())).collect())
150 }
151 },
152 &Concatenation(ref v) =>
153 {
154 let mut w:Vec<RegEx<T>>=v.iter().map(|e|e.normal_form()).filter(|e|*e!=EmptyString).collect();
155 if w.contains(&EmptySet)
156 {
157 EmptySet
158 }
159 else
160 {
161 let n=w.len();
162 if n==0
163 {
164 EmptyString
165 }
166 else if n==1
167 {
168 w[0].clone()
169 }
170 else
171 {
172 Concatenation(w.iter().map(|e|Rc::new(e.clone())).collect())
174 }
175 }
176 }
177 &KleeneStar(ref e) => KleeneStar(Rc::new(e.normal_form())),
178 &Complement(ref v) => Complement(v.clone()),
179 }
180 }
181 #[allow(dead_code)]
182 fn to_string(&self, t_to_string:&Fn(&T)->String )->String
183 {
184 match self
185 {
186 &EmptySet => String::from("@emptyset"),
187 &EmptyString => String::from(""),
188 &Single(ref lit) => t_to_string(&lit),
189 &Disjunction(ref v) =>
190 {
191 let mut r=String::from("(");
192 r+=&v.iter().map(|elem|
193 Rc::as_ref(elem).to_string(t_to_string)
194 ).collect::<Vec<String>>().join("|");
195 r.push(')');
196 r
197 },
198 &Concatenation(ref v) =>
199 {
200 let mut r=String::from("(");
201 r+=&v.iter().map(|elem|
202 Rc::as_ref(elem).to_string(t_to_string)
203 ).collect::<Vec<String>>().join(")(");
204 r.push(')');
205 r
206 },
207 &KleeneStar(ref e) => String::from("(")+&Rc::as_ref(&e).to_string(t_to_string)+&String::from(")*"),
208 &Complement(ref v) => String::from("[^")+&v.iter().map(t_to_string).collect::<String>()+&String::from("]"),
210 }
211 }
212 fn contains_empty(&self) -> bool
213 {
214 match self
215 {
216 &EmptySet => false,
217 &EmptyString => true,
218 &Single(_) => false,
219 &Disjunction(ref v) =>
220 {
221 for e in v
222 {
223 if e.contains_empty()
224 {
225 return true;
226 }
227 }
228 false
229 },
230 &Concatenation(ref v) =>
231 {
232 for e in v
233 {
234 if !e.contains_empty()
235 {
236 return false;
237 }
238 }
239 true
240 },
241 &KleeneStar(_) => true,
242 &Complement(_) => false,
243 }
244 }
245 fn simple_derivative(&self, argument:&T) -> Self
246 {
247 match self
248 {
249 &EmptySet => EmptySet,
250 &EmptyString => EmptySet,
251 &Single(ref lit) => if *lit==*argument { EmptyString } else { EmptySet },
252 &Disjunction(ref v) => Disjunction(v.iter().map(|x|Rc::new(x.simple_derivative(argument))).collect()),
253 &Concatenation(ref v) =>
254 {
255 let mut disj=vec![];
267 let mut w=v.clone();
268 while w.len()>0
269 {
270 let follow=w[0].contains_empty();
271 w[0]=Rc::new(w[0].simple_derivative(argument));
272 disj.push(Rc::new(Concatenation(w.clone())));
273 w.remove(0);
274 if !follow
275 {
276 break;
277 }
278 }
279 Disjunction(disj)
280 },
281 &KleeneStar(ref e) => Concatenation(vec![Rc::new(e.simple_derivative(argument)),Rc::new(KleeneStar(e.clone()))]),
282 &Complement(ref v) =>
283 {
284 for e in v
285 {
286 if *e==*argument
287 {
288 return EmptySet;
289 }
290 }
291 EmptyString
292 },
293 }
294 }
295 fn derivative(&self, argument:&T) -> Self
296 {
297 self.simple_derivative(argument).normal_form()
298 }
299 pub fn map<U:Ord>(&self,f:&Fn(&T)->U) -> RegEx<U>
300 {
301 match self
302 {
303 &EmptySet => EmptySet,
304 &EmptyString => EmptyString,
305 &Single(ref lit) => Single(f(lit)),
306 &Disjunction(ref v) => Disjunction(v.iter().map(|x|Rc::new(x.map(f))).collect()),
307 &Concatenation(ref v) => Concatenation(v.iter().map(|x|Rc::new(x.map(f))).collect()),
308 &KleeneStar(ref e) => KleeneStar(Rc::new(e.map(f))),
309 &Complement(ref v) => Complement(v.iter().map(f).collect()),
310 }
311 }
312 fn firsts(&self, all:&Vec<T>) -> BTreeSet<T>
313 {
314 let mut r:BTreeSet<T>=BTreeSet::new();
315 match self
316 {
317 &EmptySet => (),
318 &EmptyString => (),
319 &Single(ref lit) =>
320 {
321 r.insert(lit.clone());
322 },
323 &Disjunction(ref v) =>
324 {
325 for e in v
326 {
327 let mut f=e.firsts(all);
328 r.append(&mut f);
329 }
330 },
331 &Concatenation(ref v) =>
332 {
333 for e in v
334 {
335 let mut f=e.firsts(all);
336 r.append(&mut f);
337 if !e.contains_empty()
338 {
339 break;
340 }
341 }
342 },
343 &KleeneStar(ref e) =>
344 {
345 let mut f=e.firsts(all);
346 r.append(&mut f);
347 },
348 &Complement(ref v) =>
349 {
350 for lit in all
351 {
352 if !v.contains(lit)
353 {
354 r.insert(lit.clone());
355 }
356 }
357 },
358 };
359 r
360 }
361}
362
363fn concatenate<T:Ord+'static>(v:Vec<Rc<RegEx<T>>>) -> Rc<RegEx<T>>
364{
365 let n=v.len();
366 if n==0
367 {
368 Rc::new(EmptyString)
369 }
370 else if n==1
371 {
372 v[0].clone()
373 }
374 else
375 {
376 Rc::new(Concatenation(v))
377 }
378}
379
380fn disjunctive<T:Ord+'static>(v:Vec<Rc<RegEx<T>>>) -> Rc<RegEx<T>>
381{
382 let mut w=v.clone();
383 w.sort();
384 let n=w.len();
385 if n==0
386 {
387 Rc::new(EmptySet)
388 }
389 else if n==1
390 {
391 w[0].clone()
392 }
393 else
394 {
395 Rc::new(Disjunction(w))
396 }
397}
398
399#[derive(Clone)]
400enum RegexCharStackElem
401{
402 Start,
403 End,
404 Bar,
405 ProcessedBar,
406 Open,
407 Close,
408 Expression(Rc<RegEx<char>>),
409}
410
411pub fn regex(source:&str) -> Rc<RegEx<char>>
413{
414 let mut it=source.chars();
420 let mut stack=vec![RegexCharStackElem::Start];
421 loop
422 {
423 loop
425 {
426 match stack.pop().unwrap()
428 {
429 RegexCharStackElem::Start =>
430 {
431 stack.push(RegexCharStackElem::Start);
432 break;
433 }
434 RegexCharStackElem::End =>
435 {
436 let mut i=stack.len()-1;
438 while i>=1
439 {
440 if let RegexCharStackElem::Expression(_)=stack[i]
441 {
442 i-=1;
443 }
444 else
445 {
446 break;
447 }
448 }
449 i+=1;
450 if i==stack.len()
451 {
452 let new=disjunctive(
456 stack.iter().filter(|&x|match x
457 {
458 &RegexCharStackElem::Expression(_) => true,
459 _ => false,
460 }).map(|x|match x
461 {
462 &RegexCharStackElem::Expression(ref e) => e.clone(),
463 _ => panic!("Ending"),
464 }).collect());
465 return new;
466 }
467 let new=concatenate(
469 stack[i..].iter().map(|x|match x
470 {
471 &RegexCharStackElem::Expression(ref e) => e.clone(),
472 _ => panic!("Ending"),
473 }).collect());
474 stack.resize(i,RegexCharStackElem::Start);
475 stack.push(RegexCharStackElem::Expression(new));
476 stack.push(RegexCharStackElem::ProcessedBar);
477 stack.push(RegexCharStackElem::End);
478 },
479 RegexCharStackElem::Bar =>
480 {
481 let mut i=stack.len()-1;
483 while i>=1
484 {
485 if let RegexCharStackElem::Expression(_)=stack[i]
486 {
487 i-=1;
488 }
489 else
490 {
491 break;
492 }
493 }
494 i+=1;
495 let new=concatenate(
497 stack[i..].iter().map(|x|match x
498 {
499 &RegexCharStackElem::Expression(ref e) => e.clone(),
500 _ => panic!("Bar"),
501 }).collect());
502 stack.resize(i,RegexCharStackElem::Start);
503 stack.push(RegexCharStackElem::Expression(new));
504 stack.push(RegexCharStackElem::ProcessedBar);
505 }
506 RegexCharStackElem::ProcessedBar =>
507 {
508 stack.push(RegexCharStackElem::ProcessedBar);
509 break;
510 },
511 RegexCharStackElem::Open =>
512 {
513 stack.push(RegexCharStackElem::Open);
514 break;
515 },
516 RegexCharStackElem::Close =>
517 {
518 let mut i=stack.len()-1;
520 while i>=1
521 {
522 if let RegexCharStackElem::Expression(_)=stack[i]
523 {
524 i-=1;
525 }
526 else
527 {
528 break;
529 }
530 }
531 i+=1;
532 if i==stack.len()
533 {
534 i-=1;
537 while i>=1
538 {
539 if let RegexCharStackElem::Open=stack[i]
540 {
541 break;
542 }
543 else
544 {
545 i-=1;
546 }
547 }
548 let new=disjunctive(
560 stack[i..].iter().filter(|&x|match x
561 {
562 &RegexCharStackElem::Expression(_) => true,
563 _ => false,
564 }).map(|x|match x
565 {
566 &RegexCharStackElem::Expression(ref e) => e.clone(),
567 _ => panic!("Close"),
568 }).collect());
569 stack.resize(i,RegexCharStackElem::Start);
570 stack.push(RegexCharStackElem::Expression(new));
572 }
573 else
574 {
575 let new=concatenate(
577 stack[i..].iter().map(|x|match x
578 {
579 &RegexCharStackElem::Expression(ref e) => e.clone(),
580 _ => panic!("Close"),
581 }).collect());
582 stack.resize(i,RegexCharStackElem::Start);
583 stack.push(RegexCharStackElem::Expression(new));
584 stack.push(RegexCharStackElem::ProcessedBar);
585 stack.push(RegexCharStackElem::Close);
586 }
587 },
588 RegexCharStackElem::Expression(exp) =>
589 {
590 stack.push(RegexCharStackElem::Expression(exp));
591 break;
592 },
593 };
594 }
595 match it.next()
596 {
597 None =>
598 {
599 stack.push(RegexCharStackElem::End);
600 },
601 Some(mut c) =>
602 if c=='['
603 {
604 let mut lit=it.next().unwrap();
606 let complemented= lit=='^';
607 if complemented
608 {
609 lit=it.next().unwrap();
610 }
611 let mut v:Vec<char>=vec![];
613 while lit!=']'
614 {
615 v.push(lit);
618 lit=it.next().unwrap();
619 }
620 if complemented
623 {
624 stack.push(RegexCharStackElem::Expression(Rc::new(Complement(v))));
627 }
628 else
629 {
630 stack.push(RegexCharStackElem::Expression(disjunctive(v.iter().map(|&l|Rc::new(Single(l))).collect())));
632 }
633 }
634 else if c=='|'
635 {
636 stack.push(RegexCharStackElem::Bar);
637 }
638 else if c=='('
639 {
640 stack.push(RegexCharStackElem::Open);
641 }
642 else if c==')'
643 {
644 stack.push(RegexCharStackElem::Close);
645 }
646 else if c=='*'
647 {
648 match stack.pop().unwrap()
650 {
651 RegexCharStackElem::Expression(exp) => stack.push(RegexCharStackElem::Expression(Rc::new(KleeneStar(exp)))),
653 _ => panic!("Nothing to star"),
654 };
655 }
656 else
657 {
658 if c=='\\'
660 {
661 c=it.next().unwrap();
662 }
663 stack.push(RegexCharStackElem::Expression(Rc::new(Single(c))));
666 },
667 };
668 }
669}
670
671#[derive(Debug,Clone)]
680pub enum Action
681{
682 Flush,
683 Keep(Rc<Vec<bool>>),
685 Match(usize),
686}
687
688pub trait StateMachine
689{
690 type Item;
691 fn trans(&self, state:usize, elem:Self::Item) -> (usize,Action);
692 fn zero_state_may_be_first(&self, elem:Self::Item) -> bool;fn replace_all<I:Iterator<Item=Self::Item>+Clone, J:Iterator<Item=Self::Item>+Clone>(&self, source:I, replacement:J) -> ReplacerIt<Self::Item,Self,I,J>
697 {
698 ReplacerIt
700 {
701 machine:self,
702 source: source.clone(),
703 replacement,
704 state:State::start(),
705 flushing: 0,
707 source_flush: source.clone(),
708 insert: None,
709 }
710 }
711 fn replace_vec_all<'a,J:Iterator<Item=Self::Item>+Clone>(&'a self, source:&'a Vec<Self::Item>, replacement:J) -> ReplacerVecIt<'a,Self::Item,Self,J>
712 {
713 ReplacerVecIt
714 {
715 machine:self,
716 source,
717 source_index: 0,
718 replacement,
719 state:State::start(),
720 flushing: 0,
721 source_flush: 0,
722 insert: None,
723 }
724 }
725 fn find_all<I:Iterator<Item=Self::Item>+Clone>(&self, source:I) -> FinderIt<Self::Item,Self,I>
726 {
727 FinderIt
728 {
729 machine:self,
730 source: source.clone(),
731 state:State::start(),
732 source_flush: source.clone(),
734 }
735 }
736 fn find_vec_all<'a>(&'a self, source:&'a Vec<Self::Item>) -> FinderVecIt<'a,Self::Item,Self>
737 {
738 FinderVecIt
739 {
740 machine:self,
741 source,
742 source_index: 0,
743 state:State::start(),
744 source_flush: 0,
745 }
746 }
747}
748
749pub struct StateMachineFunction<'a,T:'a>
750{
751 transition: &'a Fn(usize,T)->(usize,Action),}
753
754impl<'a,T> StateMachine for StateMachineFunction<'a,T>
755{
756 type Item=T;
757 fn trans(&self, state:usize, elem:T) -> (usize,Action)
758 {
759 (self.transition)(state,elem)
760 }
761 fn zero_state_may_be_first(&self, _elem:Self::Item) -> bool
762 {
763 true
764 }
765}
766
767struct State
769{
770 index: usize,
771 kept: Vec<usize>
773}
774
775pub struct ReplacerIt<
776 'a,
777 T:'a,
778 S: StateMachine<Item=T>+?Sized+'a,
780 I: 'a+Iterator<Item=T>,
781 J: 'a+Iterator<Item=T>,
782 >
784{
785 machine: &'a S,
786 source: I,
787 replacement: J,
788 state: State,
790 flushing: usize,
792 source_flush: I,
793 insert: Option<J>,
794}
795
796impl State
798{
799 fn start() -> State
801 {
802 State{
803 index:0,
804 kept:vec![0],
806 }
807 }
808}
809
810impl<'a,T:'a+Clone,S:StateMachine<Item=T>+'a,I,J> Iterator for ReplacerIt<'a,T,S,I,J> where I:Iterator<Item=T>+Clone, J:Iterator<Item=T>+Clone
813{
814 type Item=T;
815 fn next(&mut self) -> Option<T>
816 {
817 if self.flushing>0
819 {
820 self.flushing-=1;
821 return self.source_flush.next();
823 }
824 if let Some(ref mut ins)=self.insert
825 {
826 if let Some(e)=ins.next()
827 {
828 return Some(e);
830 }
831 self.source_flush=self.source.clone();
832 }
833 self.insert=None;
834 loop
835 {
836 let mut elem;
837 match self.source.next()
838 {
839 None => return self.source_flush.next(),
840 Some(e) =>
841 {
842 elem=e;
843 if self.state.index==0 && self.state.kept[0]==0
844 {
845 while !self.machine.zero_state_may_be_first(elem.clone())
847 {
848 self.flushing+=1;
849 match self.source.next()
850 {
851 None =>
852 {
853 return self.source_flush.next();
854 },
855 Some(e) => elem=e,
856 };
857 }
858 }
859 let (news,action)=self.machine.trans(self.state.index,elem);
861 self.state.index=news;
862 match action
864 {
865 Action::Keep(v) =>
866 {
867 let nkept=self.state.kept.len();
868 let added_cand=v.len()>nkept;
869 let amount=self.state.kept[0];
870 let mut i=0;
871 for j in 0..nkept
872 {
873 if v[j]
874 {
875 self.state.kept[i]=self.state.kept[j]+1;
876 i+=1;
877 }
878 }
879 self.state.kept.resize(i,0);
880 let new_amount=self.state.kept[0];
881 if added_cand
882 {
883 self.state.kept.push(if v[v.len()-1] {1} else {0});
884 }
885 if new_amount<amount+1
886 {
887 self.flushing+=amount-new_amount;
888 return self.source_flush.next();
890 }
891 },
892 Action::Flush =>
893 {
894 self.flushing+=self.state.kept[0];
901 self.state.kept.clear();
903 self.state.kept.push(0);
904 return self.source_flush.next();
905 },
906 Action::Match(cand) =>
907 {
908 let mut ins=self.replacement.clone();
910 self.flushing+=self.state.kept[0]-self.state.kept[cand];
911 self.state.kept.clear();
913 self.state.kept.push(0);
914 if self.flushing>0
915 {
916 self.insert=Some(ins);
917 self.flushing-=1;
919 return self.source_flush.next();
920 }
921 else
922 {
923 if let Some(e)=ins.next()
925 {
926 self.insert=Some(ins);
927 return Some(e);
928 }
929 else
930 {
931 if self.flushing>0
932 {
933 self.insert=Some(ins);
934 self.flushing-=1;
935 return self.source_flush.next();
936 }
937 else
938 {
939 self.source_flush=self.source.clone();
940 }
941 }
942 }
943 },
944 };
945 }
946 };
947 }
948 }
949}
950
951pub struct ReplacerVecIt<
952 'a,
953 T:'a,
954 S: StateMachine<Item=T>+?Sized+'a,
956 J: 'a+Iterator<Item=T>,
957 >
958{
959 machine: &'a S,
960 source: &'a Vec<T>,
961 source_index: usize,
962 replacement: J,
963 state: State,
964 flushing: usize,
965 source_flush: usize,
966 insert: Option<J>,
967}
968
969impl<'a,T:'a+Clone,S:StateMachine<Item=T>+'a,J> Iterator for ReplacerVecIt<'a,T,S,J> where J:Iterator<Item=T>+Clone
972{
973 type Item=T;
974 fn next(&mut self) -> Option<T>
975 {
976 if self.flushing>0
978 {
979 self.flushing-=1;
980 self.source_flush+=1;
983 return Some(self.source[self.source_flush-1].clone());
984 }
985 if let Some(ref mut ins)=self.insert
986 {
987 if let Some(e)=ins.next()
988 {
989 return Some(e);
991 }
992 self.source_flush=self.source_index;
993 }
994 self.insert=None;
995 loop
996 {
997 if self.source_index==self.source.len()
998 {
999 if self.source_flush==self.source.len()
1000 {
1001 return None;
1002 }
1003 self.source_flush+=1;
1004 return Some(self.source[self.source_flush-1].clone());
1005 }
1006 if self.state.index==0
1007 {
1008 while !self.machine.zero_state_may_be_first(self.source[self.source_index].clone())
1010 {
1011 self.source_index+=1;
1012 self.flushing+=1;
1013 if self.source_index==self.source.len()
1014 {
1015 if self.source_flush==self.source.len()
1016 {
1017 return None;
1018 }
1019 self.source_flush+=1;
1020 self.flushing-=1;
1021 return Some(self.source[self.source_flush-1].clone());
1022 }
1023 }
1024 if self.flushing>0
1025 {
1026 self.flushing-=1;
1027 self.source_flush+=1;
1028 return Some(self.source[self.source_flush-1].clone());
1029 }
1030 }
1031 let (news,action)=self.machine.trans(self.state.index,self.source[self.source_index].clone());
1032 self.source_index+=1;
1033 self.state.index=news;
1034 match action
1036 {
1037 Action::Keep(v) =>
1038 {
1039 let nkept=self.state.kept.len();
1041 let added_cand=v.len()>nkept;
1042 let amount=self.state.kept[0];
1043 let mut i=0;
1044 for j in 0..nkept
1045 {
1046 if v[j]
1047 {
1048 self.state.kept[i]=self.state.kept[j]+1;
1049 i+=1;
1050 }
1051 }
1052 self.state.kept.resize(i,0);
1053 let new_amount=self.state.kept[0];
1054 if added_cand
1055 {
1056 self.state.kept.push(if v[v.len()-1] {1} else {0});
1057 }
1058 if new_amount<amount+1
1059 {
1060 self.flushing=amount-new_amount;
1061 self.source_flush+=1;
1064 return Some(self.source[self.source_flush-1].clone());
1065 }
1066 },
1067 Action::Flush =>
1068 {
1069 self.flushing=self.state.kept[0];
1071 self.state.kept.clear();
1072 self.state.kept.push(0);
1073 self.source_flush+=1;
1074 return Some(self.source[self.source_flush-1].clone());
1075 },
1076 Action::Match(cand) =>
1077 {
1078 let mut ins=self.replacement.clone();
1079 self.flushing=self.state.kept[0]-self.state.kept[cand];
1080 self.state.kept.clear();
1081 self.state.kept.push(0);
1082 if self.flushing>0
1083 {
1084 self.insert=Some(ins);
1085 self.source_flush+=1;
1087 return Some(self.source[self.source_flush-1].clone());
1088 }
1089 else
1090 {
1091 if let Some(e)=ins.next()
1093 {
1094 self.insert=Some(ins);
1095 return Some(e);
1096 }
1097 else
1098 {
1099 if self.flushing>0
1100 {
1101 self.insert=Some(ins);
1102 self.flushing-=1;
1103 self.source_flush+=1;
1105 return Some(self.source[self.source_flush-1].clone());
1106 }
1107 else
1108 {
1109 self.source_flush=self.source_index;
1111 }
1112 }
1113 }
1114 },
1115 };
1116 }
1117 }
1118}
1119
1120pub struct FinderIt<
1121 'a,
1122 T:'a,
1123 S: StateMachine<Item=T>+?Sized+'a,
1125 I: 'a+Iterator<Item=T>,
1126 >
1127{
1128 machine: &'a S,
1129 source: I,
1130 state: State,
1131 source_flush: I,
1133}
1134
1135impl<'a,T:'a+Clone,S:StateMachine<Item=T>+'a,I> Iterator for FinderIt<'a,T,S,I> where I:Iterator<Item=T>+Clone
1136{
1137 type Item=Vec<T>;
1138 fn next(&mut self) -> Option<Vec<T>>
1139 {
1140 loop
1142 {
1143 let mut elem;
1144 match self.source.next()
1145 {
1146 None => return None,
1147 Some(e) =>
1148 {
1149 elem=e;
1150 if self.state.index==0
1151 {
1152 while !self.machine.zero_state_may_be_first(elem.clone())
1154 {
1155 match self.source.next()
1156 {
1157 None => return None,
1158 Some(e) => elem=e,
1159 };
1160 self.source_flush.next();
1161 }
1162 }
1163 let (news,action)=self.machine.trans(self.state.index,elem);
1165 self.state.index=news;
1166 match action
1169 {
1170 Action::Keep(v) =>
1171 {
1172 let nkept=self.state.kept.len();
1174 let added_cand=v.len()>nkept;
1175 let amount=self.state.kept[0];
1177 let mut i=0;
1188 for j in 0..nkept
1189 {
1190 if v[j]
1191 {
1192 self.state.kept[i]=self.state.kept[j]+1;
1193 i+=1;
1194 }
1195 }
1196 self.state.kept.resize(i,0);
1197 let new_amount=self.state.kept[0];
1199 if added_cand
1200 {
1201 self.state.kept.push(if v[v.len()-1] {1} else {0});
1203 }
1204 if new_amount<amount+1
1206 {
1207 let mut flushing=amount-new_amount+1;
1211 while flushing>0
1212 {
1213 flushing-=1;
1214 self.source_flush.next();
1215 }
1216 }
1218 },
1219 Action::Flush =>
1220 {
1221 let mut flushing=self.state.kept[0]+1;
1224 self.state.kept.clear();
1226 self.state.kept.push(0);
1227 while flushing>0
1229 {
1230 flushing-=1;
1231 self.source_flush.next();
1232 }
1233 },
1235 Action::Match(cand) =>
1236 {
1237 let amount=self.state.kept[cand]+1;
1239 let mut flushing=self.state.kept[0]-self.state.kept[cand];
1240 while flushing>0
1241 {
1242 flushing-=1;
1243 self.source_flush.next();
1244 }
1245 self.state.kept.clear();
1247 self.state.kept.push(0);
1248 return (0..amount).map(|_|self.source_flush.next()).collect();
1250 },
1251 };
1252 }
1253 };
1254 }
1255 }
1256}
1257
1258pub struct FinderVecIt<
1259 'a,
1260 T:'a,
1261 S: StateMachine<Item=T>+?Sized+'a,
1263 >
1264{
1265 machine: &'a S,
1266 source: &'a Vec<T>,
1267 source_index: usize,
1268 state: State,
1269 source_flush: usize,
1270}
1271
1272impl<'a,T:'a+Clone,S:StateMachine<Item=T>+'a> Iterator for FinderVecIt<'a,T,S>
1273{
1274 type Item=Vec<T>;
1275 fn next(&mut self) -> Option<Vec<T>>
1276 {
1277 loop
1279 {
1280 if self.source_index==self.source.len()
1281 {
1282 return None;
1283 }
1284 if self.state.index==0
1285 {
1286 while !self.machine.zero_state_may_be_first(self.source[self.source_index].clone())
1288 {
1289 self.source_index+=1;
1290 if self.source_index==self.source.len()
1291 {
1292 return None;
1293 }
1294 }
1295 self.source_flush=self.source_index;
1296 }
1297 let (news,action)=self.machine.trans(self.state.index,self.source[self.source_index].clone());
1300 self.source_index+=1;
1301 self.state.index=news;
1302 match action
1304 {
1305 Action::Keep(v) =>
1306 {
1307 let nkept=self.state.kept.len();
1308 let added_cand=v.len()>nkept;
1309 let amount=self.state.kept[0];
1310 let mut i=0;
1311 for j in 0..nkept
1312 {
1313 if v[j]
1314 {
1315 self.state.kept[i]=self.state.kept[j]+1;
1316 i+=1;
1317 }
1318 }
1319 self.state.kept.resize(i,0);
1320 let new_amount=self.state.kept[0];
1321 if added_cand
1322 {
1323 self.state.kept.push(if v[v.len()-1] {1} else {0});
1324 }
1325 if new_amount<amount+1
1326 {
1327 let flushing=amount-new_amount+1;
1329 self.source_flush+=flushing;
1330 }
1331 },
1332 Action::Flush =>
1333 {
1334 let flushing=self.state.kept[0]+1;
1336 self.state.kept.clear();
1337 self.state.kept.push(0);
1338 self.source_flush+=flushing;
1339 },
1340 Action::Match(cand) =>
1341 {
1342 let amount=self.state.kept[cand]+1;
1344 let flushing=self.state.kept[0]-self.state.kept[cand];
1345 self.source_flush+=flushing;
1346 self.state.kept.clear();
1347 self.state.kept.push(0);
1348 let r=self.source[self.source_flush..self.source_flush+amount].to_vec();
1350 self.source_flush+=amount;
1351 return Some(r);
1352 },
1353 };
1354 }
1355 }
1356}
1357
1358
1359
1360pub struct StateMachineHashMap<T:Ord>
1361{
1362 table: HashMap<(usize,T),(usize,Action)>,
1364 expressions: Vec<Vec<RegEx<T>>>,
1366 zero_firsts: BTreeSet<T>,
1367}
1368
1369impl<T:Ord+Hash> StateMachine for StateMachineHashMap<T>
1370{
1371 type Item=T;
1372 fn trans(&self, state:usize, elem:T) -> (usize,Action)
1373 {
1374 match self.table.get(&(state,elem))
1376 {
1377 Some(&(s,ref a)) => (s,a.clone()),
1378 None => (0,Action::Flush),
1379 }
1380 }
1381 fn zero_state_may_be_first(&self, elem:Self::Item) -> bool
1382 {
1383 if let Some(_)=self.zero_firsts.get(&elem)
1384 {
1385 return true;
1386 }
1387 else
1388 {
1389 return false;
1390 }
1391 }
1392}
1393
1394impl<T:Ord+Hash+Clone> RegEx<T>
1395{
1396 pub fn compile_hash_map(&self,all:&Vec<T>) -> StateMachineHashMap<T>
1397 {
1398 let snf=self.normal_form();
1399 let mut machine=StateMachineHashMap{
1400 table: HashMap::new(),
1401 expressions: vec![vec![snf.clone()]],
1403 zero_firsts: snf.firsts(all),
1404 };
1405 let mut index=0;
1406 while index<machine.expressions.len()
1408 {
1409 let exps=machine.expressions[index].clone();
1411 let mut first: BTreeSet<T> = BTreeSet::new();
1414 for e in exps.iter()
1415 {
1416 let mut f=e.firsts(all);
1418 first.append(&mut f);
1419 }
1420 for lit in first
1422 {
1423 let derivations: Vec<RegEx<T>>=exps.iter().map(|e|e.derivative(&lit)).collect();
1424 let mut mask:Vec<bool>=derivations.iter().map(|e|*e!=EmptySet).collect();
1425 for i in 1..mask.len()
1426 {
1427 if mask[i] && derivations[0..i].contains(&derivations[i])
1428 {
1429 mask[i]=false;
1430 }
1431 }
1432 let mut new_exps:Vec<RegEx<T>>=derivations.iter().enumerate().filter_map(|(i,e)|if mask[i]{Some(e.clone())}else{None}).collect();
1433 if !new_exps.contains(&snf)
1434 {
1435 new_exps.push(snf.clone());
1436 mask.push(false);
1437 }
1438 let mut action=Action::Keep(Rc::new(mask));
1442 for (i,e) in derivations.iter().enumerate()
1443 {
1444 if e.contains_empty()
1446 {
1447 action=Action::Match(i);
1448 break;
1449 }
1450 }
1451 if let Action::Match(_)=action
1453 {
1454 machine.table.insert((index,lit.clone()),(0,action.clone()));
1455 }
1456 else
1457 {
1458 let mut found=false;
1459 for (i,other_exps) in machine.expressions.iter().enumerate()
1460 {
1461 if *other_exps==new_exps
1462 {
1463 machine.table.insert((index,lit.clone()),(i,action.clone()));
1464 found=true;
1465 break;
1466 }
1467 }
1468 if !found
1469 {
1470 machine.table.insert((index,lit),(machine.expressions.len(),action));
1472 machine.expressions.push(new_exps);
1473 }
1474 }
1475 }
1476 index+=1;
1477 }
1478 machine
1479 }
1480}
1481
1482pub struct StateMachineVec
1483{
1484 table: Vec<Vec<(usize,Action)>>,
1485 zero_firsts: Vec<bool>,
1486}
1487
1488impl StateMachine for StateMachineVec
1489{
1490 type Item=u8;
1491 fn trans(&self, state:usize, elem:u8) -> (usize,Action)
1492 {
1493 self.table[state][elem as usize].clone()
1494 }
1495 fn zero_state_may_be_first(&self, elem:Self::Item) -> bool
1496 {
1497 self.zero_firsts[elem as usize]
1498 }
1499}
1500
1501impl StateMachineHashMap<u8>
1502{
1503 pub fn to_state_machine_vec(&self) -> StateMachineVec
1504 {
1505 let mut machine=StateMachineVec{
1506 table: vec![],
1507 zero_firsts: vec![false;256],
1508 };
1509 let n=self.expressions.len();
1510 machine.table.resize(n,vec![(0,Action::Flush);256]);
1511 for (&(state,lit),value) in self.table.iter()
1512 {
1513 machine.table[state][lit as usize]=value.clone();
1514 }
1515 for elem in self.zero_firsts.iter()
1516 {
1517 machine.zero_firsts[elem.clone() as usize]=true;
1518 }
1519 machine
1520 }
1521}
1522
1523
1524#[cfg(test)]
1529mod tests
1530{
1531 use {StateMachine,StateMachineFunction,Action,regex};
1532 use std::rc::Rc;
1533 #[test]
1534 fn it_works()
1535 {
1536 assert_eq!(2 + 2, 4);
1537 }
1538 #[test]
1539 fn raw_machine_function()
1540 {
1541 let kept=Rc::new(vec![true,false]);
1542 let transition = |_s,c|
1543 {
1544 if c=='a'
1547 {
1548 (0,Action::Match(0))
1549 }
1550 else if c=='o'
1551 {
1552 (0,Action::Flush)
1553 }
1554 else
1555 {
1556 (0,Action::Keep(kept.clone()))
1558 }
1559 };
1560 let machine=StateMachineFunction::<char>{
1561 transition: &transition,
1562 };
1563 let message="hola mundo";
1564 let rep="HELLO";
1565 let result:String=machine.replace_all(message.chars(),rep.chars()).collect();
1566 println!("result=\"{}\"",result);}
1568 #[test]
1569 fn create_regex1()
1570 {
1571 let reg_str="(aND|can|[bc]*|AAA)de";
1572 let reg=regex(®_str);
1573 let result=reg.to_string(&|c:&char|c.to_string());
1574 println!("reg={}",result);
1575 assert_eq!(result,"(((A)(A)(A)|(a)(N)(D)|(c)(a)(n)|((b|c))*))(d)(e)");
1576 }
1577 #[test]
1578 fn derivative1()
1579 {
1580 let reg_str="(ab)*(c(ab)*d|e)";
1581 let reg=regex(®_str);
1582 println!("da={}",reg.derivative(&'a').to_string(&|c:&char|c.to_string()));
1583 assert_eq!(reg.derivative(&'a').to_string(&|c:&char|c.to_string()),"((b)(((a)(b))*))((e|(c)(((a)(b))*)(d)))");
1584 println!("db={}",reg.derivative(&'b').to_string(&|c:&char|c.to_string()));
1585 assert_eq!(reg.derivative(&'b').to_string(&|c:&char|c.to_string()),"@emptyset");
1586 println!("dc={}",reg.derivative(&'c').to_string(&|c:&char|c.to_string()));
1587 assert_eq!(reg.derivative(&'c').to_string(&|c:&char|c.to_string()),"(((a)(b))*)(d)");
1588 println!("de={}",reg.derivative(&'e').to_string(&|c:&char|c.to_string()));
1589 assert_eq!(reg.derivative(&'e').to_string(&|c:&char|c.to_string()),"");
1590 println!("dada={}",reg.derivative(&'a').derivative(&'a').to_string(&|c:&char|c.to_string()));
1591 assert_eq!(reg.derivative(&'a').derivative(&'a').to_string(&|c:&char|c.to_string()),"@emptyset");
1592 println!("dadb={}",reg.derivative(&'a').derivative(&'b').to_string(&|c:&char|c.to_string()));
1593 assert_eq!(reg.derivative(&'a').derivative(&'b').to_string(&|c:&char|c.to_string()),"(((a)(b))*)((e|(c)(((a)(b))*)(d)))");
1594 println!("dcda={}",reg.derivative(&'c').derivative(&'a').to_string(&|c:&char|c.to_string()));
1595 assert_eq!(reg.derivative(&'c').derivative(&'a').to_string(&|c:&char|c.to_string()),"((b)(((a)(b))*))(d)");
1596 }
1597 #[test]
1598 fn create_regex_rewrite1()
1599 {
1600 let reg_str="(ab)*(c(ab)*d|e)";
1601 let reg=regex(®_str);
1602 let message="hola_abcabcababd_mundo";
1603 let rep="HERE";
1604 let all=vec![];let machine=reg.compile_hash_map(&all);
1606 println!("::create_regex_rewrite1 machine compiled");
1607 let result:String=machine.replace_all(message.chars(),rep.chars()).collect();
1608 println!("result={}",result);
1610 assert_eq!(result,"hola_abcHERE_mundo");
1611 }
1613 #[test]
1614 fn clean_headers_and_newlines()
1615 {
1616 let reg=regex(">[^\n]*\n|\n");
1617 let texto=">first line
1618second line
1619third line
1620>fourth line
1621fifth line";
1622 let all=vec!['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','>','\n',' '];
1623 let machine=reg.compile_hash_map(&all);
1624 println!("::clean_headers_and_newlines machine compiled");
1625 let rep="";
1626 let result:String=machine.replace_all(texto.chars(),rep.chars()).collect();
1627 println!("result={}",result);
1628 assert_eq!(result,"second linethird linefifth line");
1629 }
1630 #[test]
1631 fn test_escaping()
1632 {
1633 let reg=regex("\\|[^|][^|]*\\|");
1634 let texto="|abc||pqrs|oooo||ooo|trs|";
1635 let all=vec!['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','>','\n',' '];
1636 let rep="-";
1637 let machine=reg.compile_hash_map(&all);
1638 let result:String=machine.replace_all(texto.chars(),rep.chars()).collect();
1639 println!("result={}",result);
1640 assert_eq!(result,"--oooo|-trs|");
1641 }
1642 #[test]
1643 fn test_u8()
1644 {
1645 let reg=regex("la").map(&|&c|c as u8);
1646 let rep=vec![];
1647 let texto="Hola Mundo";
1648 let seq:Vec<u8>=texto.chars().map(|c|c as u8).collect();
1649 let mut all:Vec<u8>=vec![];
1650 for c in seq.iter()
1651 {
1652 if !all.contains(c)
1653 {
1654 all.push(*c);
1655 }
1656 }
1657 let machine=reg.compile_hash_map(&all).to_state_machine_vec();
1658 let result:Vec<u8>=machine.replace_all(seq.iter().map(&|&x|x),rep.iter().map(&|&x|x)).collect();
1661 let result_char:String=result.iter().map(|c|*c as char).collect();
1662 println!("result={:?}",result_char);
1663 assert_eq!(result_char,"Ho Mundo");
1664 }
1665 #[test]
1666 fn test_find()
1667 {
1668 let reg=regex("hol*a");
1669 let texto="holla mundo cosa hola hollla bla holla";
1670 let all=vec!['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','>','\n',' '];
1671 let machine=reg.compile_hash_map(&all);
1672 let result:Vec<Vec<char>>=machine.find_all(texto.chars()).collect();
1673 let result:String=result.iter().map::<String,_>(|v|v.iter().collect::<String>()).collect::<Vec<String>>().join("|");
1674 println!("::test_find result={:?}",result);
1675 assert_eq!(result,"holla|hola|hollla|holla");
1676 }
1677}