1#[cfg(feature = "no_std")]
4use alloc::collections::BTreeSet as HashSet;
5
6#[cfg(not(feature = "no_std"))]
7use std::collections::HashSet;
8
9use alloc::{borrow::Cow, boxed::Box, rc::Rc, vec, vec::Vec};
10use core::{cell::RefCell, fmt};
11
12use crate::{
13 compile::{Range, Token},
14 ctype,
15 immut_vec::ImmutVec,
16 tree::{Node as TreeNode, *},
17};
18
19#[derive(Clone)]
21pub struct PosixRegex<'a> {
22 tree: Cow<'a, Tree>,
23 case_insensitive: bool,
24 newline: bool,
25 no_start: bool,
26 no_end: bool,
27}
28impl<'a> PosixRegex<'a> {
29 pub fn new(tree: Cow<'a, Tree>) -> Self {
33 Self {
34 tree,
35 case_insensitive: false,
36 newline: false,
37 no_start: false,
38 no_end: false,
39 }
40 }
41 pub fn case_insensitive(mut self, value: bool) -> Self {
45 self.case_insensitive = value;
46 self
47 }
48 pub fn newline(mut self, value: bool) -> Self {
52 self.newline = value;
53 self
54 }
55 pub fn no_start(mut self, value: bool) -> Self {
58 self.no_start = value;
59 self
60 }
61 pub fn no_end(mut self, value: bool) -> Self {
64 self.no_end = value;
65 self
66 }
67 pub fn count_groups(&self) -> usize {
70 let mut count = 1;
71 let mut cursor = self.tree[self.tree.root].child;
72 while let Some(node) = cursor {
73 let node = &self.tree[node];
75 if node.child.is_some() {
76 cursor = node.child;
77 } else {
78 let mut node = Some(node);
79 while node
80 .map(|node| node.next_sibling.is_none())
81 .unwrap_or(false)
82 {
83 node = node.unwrap().parent.map(|node| &self.tree[node]);
84 }
85 cursor = node.and_then(|node| node.next_sibling);
86 }
87
88 if let Token::Group(_) = node.token {
90 count += 1;
91 }
92 }
93 count
94 }
95 #[allow(clippy::type_complexity)]
98 pub fn matches_exact(&self, input: &[u8]) -> Option<Box<[Option<(usize, usize)>]>> {
99 let mut matcher = PosixRegexMatcher {
100 base: self,
101 input,
102 offset: 0,
103 max_groups: self.count_groups(),
104 };
105 let internal_prev = RefCell::new(Vec::new());
106 let prev = ImmutVec::new(&internal_prev);
107 let tree = self.tree[self.tree.root]
108 .children(&self.tree)
109 .filter_map(|node| {
110 self.tree[node]
111 .child
112 .map(|child| Node::new(&self.tree, child, prev))
113 })
114 .collect();
115
116 let start = matcher.offset;
117 match matcher.matches_exact(tree) {
118 None => None,
119 Some(mut groups) => {
120 assert_eq!(groups[0], None);
121 groups[0] = Some((start, matcher.offset));
122 Some(groups)
123 }
124 }
125 }
126 #[allow(clippy::type_complexity)]
128 pub fn matches(
129 &self,
130 input: &[u8],
131 mut max: Option<usize>,
132 ) -> Vec<Box<[Option<(usize, usize)>]>> {
133 let mut matcher = PosixRegexMatcher {
134 base: self,
135 input,
136 offset: 0,
137 max_groups: self.count_groups(),
138 };
139
140 let mut arena = self.tree.arena.to_vec();
141
142 let root = self.tree[self.tree.root].child;
143
144 let group_id = NodeId::from(arena.len());
146 arena.push(TreeNode {
147 token: Token::Group(0),
148 range: Range(1, Some(1)),
149 parent: None,
150 next_sibling: None,
151 child: root,
152 });
153
154 let mut cursor = root;
156 while let Some(node) = cursor {
157 let node = &mut arena[usize::from(node)];
158 cursor = node.next_sibling;
159 node.parent = Some(group_id);
160 }
161
162 let start_id = NodeId::from(arena.len());
164 arena.push(TreeNode {
165 token: Token::InternalStart,
166 range: Range(0, None),
167 parent: None,
168 next_sibling: Some(group_id),
169 child: None,
170 });
171
172 let tree = Tree {
173 arena: arena.into_boxed_slice(),
174 root: start_id,
175 };
176 let internal_prev = RefCell::new(Vec::new());
177 let prev = ImmutVec::new(&internal_prev);
178 let tree = vec![Node::new(&tree, tree.root, prev)];
179
180 let mut matches = Vec::new();
181 while max.map(|max| max > 0).unwrap_or(true) && matcher.offset <= matcher.input.len() {
182 match matcher.matches_exact(tree.clone()) {
183 Some(groups) => {
184 if groups[0].unwrap().0 == groups[0].unwrap().1 {
185 matcher.offset += 1;
186 }
187 matches.push(groups)
188 }
189 None => break,
190 }
191 max = max.map(|max| max - 1);
192 }
193 matches
194 }
195}
196
197#[derive(Clone, Copy, Debug)]
198struct GroupEvent {
199 open: bool,
200 id: usize,
201 offset: usize,
202}
203#[derive(Clone, Copy)]
204struct BackRef {
205 offset: usize,
206 index: usize,
207 len: usize,
208}
209
210#[derive(Clone)]
211struct Node<'a> {
212 tree: &'a Tree,
213 parent: Option<Rc<Node<'a>>>,
214 node: NodeId,
215 prev: ImmutVec<'a, GroupEvent>,
216 repeated: u32,
217 backref: Option<BackRef>,
218}
219impl fmt::Debug for Node<'_> {
220 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
221 let mut range = self.node().range;
222 range.0 = range.0.saturating_sub(self.repeated);
223 range.1 = range.1.map(|max| max.saturating_sub(self.repeated));
224 write!(f, "{:?}", (&self.node().token, range))
225 }
226}
227impl<'a> Node<'a> {
228 fn prepare(mut me: Self) -> Self {
230 me.repeated = 0;
231 me.backref = None;
232 if let Token::BackRef(id) = me.node().token {
233 let mut start = None;
234 let mut end = None;
235 for event in me.prev.iter_rev() {
236 if event.id != id as usize {
237 continue;
238 }
239 if event.open {
240 start = Some(event.offset);
241 break;
242 } else {
243 end = end.or(Some(event.offset));
244 }
245 }
246 if let (Some(start), Some(end)) = (start, end) {
247 me.backref = Some(BackRef {
248 offset: start,
249 index: 0,
250 len: end - start,
251 });
252 if start == end {
253 let Range(min, _) = me.node().range;
255 me.repeated += min;
256 }
257 }
258 }
259 me
260 }
261 fn new(tree: &'a Tree, node: NodeId, prev: ImmutVec<'a, GroupEvent>) -> Self {
263 Self::prepare(Self {
264 tree,
265 parent: None,
266 node,
267 prev,
268 repeated: 0,
269 backref: None,
270 })
271 }
272 fn into_children(mut self, branches: &mut Vec<Node<'a>>, offset: usize) {
274 let id = match self.tree[self.node].token {
275 Token::Group(id) => id,
276 _ => return,
277 };
278 self.repeated += 1;
279 let mut parent = Rc::new(self);
280 let mut empty = true;
281 for alternative in parent.tree[parent.node].children(parent.tree) {
282 if let Some(node) = parent.tree[alternative].child {
283 empty = false;
284 branches.push(Self::prepare(Self {
285 tree: parent.tree,
286 parent: Some(Rc::clone(&parent)),
287 node,
288 prev: parent.prev.push(GroupEvent {
289 open: true,
290 id,
291 offset,
292 }),
293 repeated: 0,
294 backref: None,
295 }));
296 }
297 }
298 if empty {
299 let parent = Rc::get_mut(&mut parent)
300 .expect("group empty but still there's a dangling reference");
301 for &open in &[true, false] {
302 parent.prev = parent.prev.push(GroupEvent { open, id, offset });
303 }
304 parent.add_branches(branches, offset);
305 }
306 }
307 fn node(&self) -> &TreeNode {
309 &self.tree[self.node]
310 }
311 fn get_capturing_groups(
313 &self,
314 max_count: usize,
315 offset: usize,
316 ) -> Box<[Option<(usize, usize)>]> {
317 let mut prev = self.prev;
318
319 let mut parent = self.node().parent;
321 while let Some(group) = parent {
322 let group = &self.tree[group];
323 parent = group.parent;
324 if let Token::Group(id) = group.token {
325 prev = prev.push(GroupEvent {
326 open: false,
327 id,
328 offset,
329 })
330 }
331 }
332
333 let mut groups: Vec<(Option<usize>, Option<usize>)> = vec![(None, None); max_count];
335 for event in prev.iter_rev() {
336 let group = &mut groups[event.id];
337 if event.open {
338 group.0 = group.0.or(Some(event.offset));
339 } else {
340 group.1 = group.1.or(Some(event.offset));
341 }
342 }
343 groups
344 .into_iter()
345 .map(|(start, end)| Some((start?, end?)))
346 .collect::<Vec<_>>()
347 .into_boxed_slice()
348 }
349 fn increment(&mut self) {
351 if let Some(ref mut backref) = self.backref {
352 backref.index += 1;
353 if backref.index >= backref.len {
354 backref.index = 0;
355 self.repeated += 1;
356 }
357 } else {
358 self.repeated += 1;
359 }
360 }
361 fn add_branches(&self, branches: &mut Vec<Node<'a>>, offset: usize) {
364 let Range(min, _) = self.node().range;
365 if self
366 .backref
367 .map(|backref| backref.index > 0 || self.repeated < min)
368 .unwrap_or(false)
369 {
370 } else if let Some(next) = self.node().next_sibling {
372 branches.push(Self::prepare(Self {
373 node: next,
374 ..self.clone()
375 }));
376 } else {
377 let parent = match self.parent {
378 Some(ref parent) => parent,
379 None => return,
380 };
381 let Range(min, _) = parent.node().range;
382
383 let mut ids = Vec::new();
385 {
386 let mut parent = Some(parent);
387 while let Some(node) = parent {
388 if let Token::Group(id) = node.node().token {
389 ids.push(id);
390 }
391 parent = node.parent.as_ref();
392 }
393 }
394
395 if parent.repeated >= min {
396 let mut parent = Some(parent);
398 while parent
399 .map(|parent| parent.node().next_sibling.is_none())
400 .unwrap_or(false)
401 {
402 parent = parent.unwrap().parent.as_ref();
403 }
404 if let Some((node, next)) =
405 parent.and_then(|parent| parent.node().next_sibling.map(|node| (parent, node)))
406 {
407 let clone = (**node).clone();
408 let mut prev = self.prev;
409 for &id in &ids {
410 prev = prev.push(GroupEvent {
411 open: false,
412 id,
413 offset,
414 });
415 }
416 branches.push(Self::prepare(Self {
417 node: next,
418 prev,
419 ..clone
420 }));
421 }
422 }
423
424 let mut parent = Some(parent);
426 while let Some(node) = parent {
427 parent = node.parent.as_ref();
428 let Range(_, max) = node.node().range;
429 if max.map(|max| node.repeated < max).unwrap_or(true) {
430 let mut clone = (**node).clone();
431 let mut prev = self.prev;
432 for &id in &ids {
433 prev = prev.push(GroupEvent {
434 open: false,
435 id,
436 offset,
437 });
438 }
439 clone.prev = prev;
440 clone.into_children(branches, offset);
441 }
442 }
443 }
444 }
445 fn is_final(&self) -> bool {
447 let Range(min, _) = self.node().range;
448 if self.repeated < min {
449 return false;
450 }
451
452 let mut next = Some(self);
453 while let Some(current) = next {
454 let mut node = current.node();
455 if node.token == Token::Alternative {
456 next = current.parent.as_deref();
458 node = &self.tree[node.parent.expect("found root alternative")];
459 }
460 if let Token::Group(_) = node.token {
461 let Range(min, _) = node.range;
462 if current.repeated < min {
463 return false;
464 }
465 }
466 if node.next_sibling.is_some() {
467 break;
468 }
469 next = current.parent.as_deref();
470 }
471 next.and_then(|node| self.tree[node.node].next_sibling)
472 .is_none()
473 }
474}
475
476struct PosixRegexMatcher<'a> {
477 base: &'a PosixRegex<'a>,
478 input: &'a [u8],
479 offset: usize,
480 max_groups: usize,
481}
482impl PosixRegexMatcher<'_> {
483 fn expand<'b>(
484 &mut self,
485 skip: &mut HashSet<NodeId>,
486 branches: &mut [Node<'b>],
487 ) -> Vec<Node<'b>> {
488 let mut insert = Vec::new();
489
490 for branch in &mut *branches {
491 if skip.contains(&branch.node) {
492 continue;
493 }
494
495 let node = branch.node();
496
497 if let Token::Group(_) = node.token {
498 branch.clone().into_children(&mut insert, self.offset);
499 }
500
501 let Range(min, _) = node.range;
502 if branch.repeated >= min {
503 branch.add_branches(&mut insert, self.offset);
505 }
506 }
507
508 if !insert.is_empty() {
509 for branch in &mut *branches {
510 skip.insert(branch.node);
511 }
512 let mut new = self.expand(skip, &mut insert);
513 insert.append(&mut new);
514 }
515 insert
516 }
517
518 #[allow(clippy::type_complexity)]
519 fn matches_exact(&mut self, mut branches: Vec<Node>) -> Option<Box<[Option<(usize, usize)>]>> {
520 let mut succeeded = None;
523 let mut prev = self
524 .offset
525 .checked_sub(1)
526 .and_then(|index| self.input.get(index).cloned());
527
528 let mut set = HashSet::new();
529
530 loop {
531 let next = self.input.get(self.offset).cloned();
532
533 set.clear();
534 let mut insert = self.expand(&mut set, &mut branches);
535 branches.append(&mut insert);
536
537 loop {
539 let mut index = 0;
540 let mut remove = 0;
541
542 while index < branches.len() {
543 if remove > 0 {
544 branches.swap(index, index - remove);
545 }
546 let branch = &mut branches[index - remove];
547 index += 1;
548
549 let node = branch.node();
550
551 match node.token {
552 Token::End | Token::Start | Token::WordEnd | Token::WordStart => {
553 let accepts = match node.token {
554 Token::End => {
555 (!self.base.no_end && next.is_none())
556 || (self.base.newline && next == Some(b'\n'))
557 }
558 Token::Start => {
559 (!self.base.no_start && self.offset == 0)
560 || (self.base.newline && prev == Some(b'\n'))
561 }
562 Token::WordEnd => next.map(ctype::is_word_boundary).unwrap_or(true),
563 Token::WordStart => {
564 prev.map(ctype::is_word_boundary).unwrap_or(true)
565 }
566 _ => unreachable!(),
567 };
568 if accepts {
569 branch.increment();
570 branch.add_branches(&mut insert, self.offset);
571 }
572 if branch.is_final() {
573 succeeded =
574 Some(branch.get_capturing_groups(self.max_groups, self.offset));
575 }
576 remove += 1;
577 }
578 _ => (),
579 }
580 }
581 branches.truncate(branches.len() - remove);
582
583 if insert.is_empty() {
584 break;
585 }
586 set.clear();
587 let mut insert2 = self.expand(&mut set, &mut insert);
588 branches.append(&mut insert);
589 branches.append(&mut insert2);
590 }
591
592 let mut index = 0;
593 let mut remove = 0;
594
595 while index < branches.len() {
597 if remove > 0 {
598 branches.swap(index, index - remove);
601 }
602 let branch = &mut branches[index - remove];
603 index += 1;
604
605 let node = branch.node();
606 let Range(_, max) = node.range;
607
608 let accepts = max.map(|max| branch.repeated < max).unwrap_or(true)
610 && match node.token {
611 Token::InternalStart => next.is_some(),
612 Token::Group { .. } => false, Token::Any => next
615 .map(|c| !self.base.newline || c != b'\n')
616 .unwrap_or(false),
617 Token::BackRef(_) => {
618 if let Some(ref backref) = branch.backref {
619 next == Some(self.input[backref.offset + backref.index])
620 } else {
621 false
622 }
623 }
624 Token::Char(c) => {
625 if self.base.case_insensitive {
626 next.map(|c2| c & !32 == c2 & !32).unwrap_or(false)
627 } else {
628 next == Some(c)
629 }
630 }
631 Token::OneOf { invert, ref list } => {
632 if let Some(next) = next {
633 (!invert || !self.base.newline || next != b'\n')
634 && list
635 .iter()
636 .any(|c| c.matches(next, self.base.case_insensitive))
637 != invert
638 } else {
639 false
640 }
641 }
642
643 Token::Alternative
644 | Token::End
645 | Token::Root
646 | Token::Start
647 | Token::WordEnd
648 | Token::WordStart => unreachable!(),
649 };
650
651 if accepts {
652 branch.increment();
653 } else {
654 if branch.is_final() {
655 let groups = branch.get_capturing_groups(self.max_groups, self.offset);
656
657 let mut add = true;
658 if let Some((new_start, new_end)) = groups[0] {
659 if let Some(previous) = succeeded.as_ref() {
660 if let Some((prev_start, prev_end)) = previous[0] {
661 if new_end - new_start <= prev_end - prev_start {
662 add = false;
663 }
664 }
665 }
666 }
667 if add {
668 succeeded = Some(groups);
669 }
670 }
671 remove += 1;
672 }
673 }
674 let end = branches.len() - remove;
675 branches.truncate(end);
676
677 if branches.is_empty() ||
678 (succeeded.is_some() && branches.iter().all(|t| t.node().token == Token::InternalStart))
680 {
681 return succeeded;
682 }
683
684 if next.is_some() {
685 self.offset += 1;
686 prev = next;
687 }
688 }
689 }
690}
691
692#[cfg(test)]
693mod tests {
694 #[cfg(feature = "bench")]
695 extern crate test;
696
697 #[cfg(feature = "bench")]
698 use self::test::Bencher;
699
700 use super::*;
701 use crate::PosixRegexBuilder;
702
703 fn boxed_slice<T>(slice: Box<[T]>) -> Box<[T]> {
706 slice
707 }
708
709 macro_rules! abox {
710 ($($item:expr),*) => {
711 boxed_slice(Box::new([$($item),*]))
712 }
713 }
714
715 fn compile(regex: &str) -> PosixRegex {
716 PosixRegexBuilder::new(regex.as_bytes())
717 .with_default_classes()
718 .compile()
719 .expect("error compiling regex")
720 }
721 fn matches(regex: &str, input: &str) -> Vec<Box<[Option<(usize, usize)>]>> {
722 compile(regex).matches(input.as_bytes(), None)
723 }
724 fn matches_exact(regex: &str, input: &str) -> Option<Box<[Option<(usize, usize)>]>> {
725 compile(regex).matches_exact(input.as_bytes())
726 }
727
728 #[test]
729 fn basic() {
730 assert!(matches_exact("abc", "abc").is_some());
731 assert!(matches_exact("abc", "bbc").is_none());
732 assert!(matches_exact("abc", "acc").is_none());
733 assert!(matches_exact("abc", "abd").is_none());
734 }
735 #[test]
736 fn repetitions() {
737 assert!(matches_exact("abc*", "ab").is_some());
738 assert!(matches_exact("abc*", "abc").is_some());
739 assert!(matches_exact("abc*", "abccc").is_some());
740
741 assert!(matches_exact(r"a\{1,2\}b", "b").is_none());
742 assert!(matches_exact(r"a\{1,2\}b", "ab").is_some());
743 assert!(matches_exact(r"a\{1,2\}b", "aab").is_some());
744 assert!(matches_exact(r"a\{1,2\}b", "aaab").is_none());
745
746 assert!(matches_exact(r"[abc]\{3\}", "abcTRAILING").is_some());
747 assert!(matches_exact(r"[abc]\{3\}", "abTRAILING").is_none());
748 }
749 #[test]
750 fn any() {
751 assert!(matches_exact(".*", "").is_some());
752 assert!(matches_exact(".*b", "b").is_some());
753 assert!(matches_exact(".*b", "ab").is_some());
754 assert!(matches_exact(".*b", "aaaaab").is_some());
755 assert!(matches_exact(".*b", "HELLO WORLD").is_none());
756 assert!(matches_exact(".*b", "HELLO WORLDb").is_some());
757 assert!(matches_exact("H.*O WORLD", "HELLO WORLD").is_some());
758 assert!(matches_exact("H.*ORLD", "HELLO WORLD").is_some());
759 }
760 #[test]
761 fn brackets() {
762 assert!(matches_exact("[abc]*d", "abcd").is_some());
763 assert!(matches_exact("[0-9]*d", "1234d").is_some());
764 assert!(matches_exact("[[:digit:]]*d", "1234d").is_some());
765 assert!(matches_exact("[[:digit:]]*d", "abcd").is_none());
766 }
767 #[test]
768 fn alternations() {
769 assert!(matches_exact(r"abc\|bcd", "abc").is_some());
770 assert!(matches_exact(r"abc\|bcd", "bcd").is_some());
771 assert!(matches_exact(r"abc\|bcd", "cde").is_none());
772 assert!(matches_exact(r"[A-Z]\+\|yee", "").is_none());
773 assert!(matches_exact(r"[A-Z]\+\|yee", "HELLO").is_some());
774 assert!(matches_exact(r"[A-Z]\+\|yee", "yee").is_some());
775 assert!(matches_exact(r"[A-Z]\+\|yee", "hello").is_none());
776 }
777 #[test]
778 fn offsets() {
779 assert_eq!(matches_exact("abc", "abcd"), Some(abox![Some((0, 3))]));
780 assert_eq!(
781 matches_exact(r"[[:alpha:]]\+", "abcde12345"),
782 Some(abox![Some((0, 5))])
783 );
784 assert_eq!(
785 matches_exact(r"a\(bc\)\+d", "abcbcd"),
786 Some(abox![Some((0, 6)), Some((3, 5))])
787 );
788 assert_eq!(
789 matches_exact(r"hello\( \(world\|universe\) :D\)\?!", "hello world :D!"),
790 Some(abox![Some((0, 15)), Some((5, 14)), Some((6, 11))])
791 );
792 assert_eq!(
793 matches_exact(r"hello\( \(world\|universe\) :D\)\?", "hello world :D"),
794 Some(abox![Some((0, 14)), Some((5, 14)), Some((6, 11))])
795 );
796 assert_eq!(
797 matches_exact(r"\(\<hello\>\) world", "hello world"),
798 Some(abox![Some((0, 11)), Some((0, 5))])
799 );
800 assert_eq!(
801 matches_exact(r".*d", "hid howd ared youd"),
802 Some(abox![Some((0, 18))])
803 );
804 assert_eq!(
805 matches_exact(r".*\(a\)", "bbbbba"),
806 Some(abox![Some((0, 6)), Some((5, 6))])
807 );
808 assert_eq!(
809 matches_exact(r"\(a \(b\) \(c\)\) \(d\)", "a b c d"),
810 Some(abox![
811 Some((0, 7)),
812 Some((0, 5)),
813 Some((2, 3)),
814 Some((4, 5)),
815 Some((6, 7))
816 ])
817 );
818 assert_eq!(
819 matches_exact(r"\(.\)*", "hello"),
820 Some(abox![Some((0, 5)), Some((4, 5))])
821 );
822 assert_eq!(
823 matches(r"h\(i\)", "hello hi lol"),
824 vec![abox![Some((6, 8)), Some((7, 8))]]
825 );
826 assert_eq!(
827 matches_exact(r"\(\([[:alpha:]]\)*\)", "abcdefg"),
828 Some(abox![Some((0, 7)), Some((0, 7)), Some((6, 7))])
829 );
830 assert_eq!(
831 matches_exact(r"\(\.\([[:alpha:]]\)\)*", ".a.b.c.d.e.f.g"),
832 Some(abox![Some((0, 14)), Some((12, 14)), Some((13, 14))])
833 );
834 assert_eq!(
835 matches_exact(r"\(a\|\(b\)\)*\(c\)", "bababac"),
836 Some(abox![
837 Some((0, 7)),
838 Some((5, 6)),
839 Some((4, 5)),
840 Some((6, 7))
841 ])
842 );
843 assert_eq!(
844 matches_exact(r"\(a\|\(b\)\)*\(c\)", "aaac"),
845 Some(abox![Some((0, 4)), Some((2, 3)), None, Some((3, 4))])
846 );
847 assert_eq!(
848 matches_exact(r"a\(\)bc", "abc"),
849 Some(abox![Some((0, 3)), Some((1, 1))])
850 );
851 }
852 #[test]
853 fn matches_is_lazy() {
854 assert_eq!(
855 matches(r"\(hi\)\+", "hello hihi kek"),
856 vec![abox![Some((6, 10)), Some((8, 10))]]
857 );
858 assert_eq!(
859 matches(r"o\+", "helloooooooo woooorld, hooow are you?"),
860 vec![
861 abox![Some((4, 12))],
862 abox![Some((14, 18))],
863 abox![Some((24, 27))],
864 abox![Some((34, 35))]
865 ]
866 );
867 assert_eq!(
868 matches(r"z*", "abc"),
869 vec![
870 abox![Some((0, 0))],
871 abox![Some((1, 1))],
872 abox![Some((2, 2))],
873 abox![Some((3, 3))]
874 ]
875 );
876 }
877 #[test]
878 fn start_and_end() {
879 assert!(matches_exact("^abc$", "abc").is_some());
880 assert!(matches_exact("^bcd", "bcde").is_some());
881 assert!(matches_exact("^bcd", "abcd").is_none());
882 assert!(matches_exact("abc$", "abc").is_some());
883 assert!(matches_exact("abc$", "abcd").is_none());
884
885 assert!(matches_exact(r".*\(^\|a\)c", "c").is_some());
886 assert!(matches_exact(r".*\(^\|a\)c", "ac").is_some());
887 assert!(matches_exact(r".*\(^\|a\)c", "bc").is_none());
888
889 assert!(matches_exact(".*^^a", "helloabc").is_none());
891 assert!(matches_exact(".*^^a", "abc").is_some());
892 }
893 #[test]
894 fn word_boundaries() {
895 assert!(matches_exact(r"hello\>.world", "hello world").is_some());
896 assert!(matches_exact(r"hello\>.world", "hello!world").is_some());
897 assert!(matches_exact(r"hello\>.world", "hellooworld").is_none());
898
899 assert!(matches_exact(r"hello.\<world", "hello world").is_some());
900 assert!(matches_exact(r"hello.\<world", "hello!world").is_some());
901 assert!(matches_exact(r"hello.\<world", "hellooworld").is_none());
902
903 assert!(matches_exact(r".*\<hello\>", "hihello").is_none());
904 assert!(matches_exact(r".*\<hello\>", "hi_hello").is_none());
905 assert!(matches_exact(r".*\<hello\>", "hi hello").is_some());
906 }
907 #[test]
908 fn groups() {
909 assert!(matches_exact(r"\(a*\)*", "aaaaa").is_some());
910 assert!(matches_exact(r"\(hello\) world", "hello world").is_some());
911 assert!(matches_exact(r"\(a*\|b\|c\)d", "d").is_some());
912 assert!(matches_exact(r"\(a*\|b\|c\)d", "aaaad").is_some());
913 assert!(matches_exact(r"\(a*\|b\|c\)d", "bd").is_some());
914 assert!(matches_exact(r"\(a*\|b\|c\)d", "bbbbbd").is_none());
915 }
916 #[test]
917 fn repeating_groups() {
918 assert!(matches_exact(r"\(a\|b\|c\)*d", "d").is_some());
919 assert!(matches_exact(r"\(a\|b\|c\)*d", "aaaad").is_some());
920 assert!(matches_exact(r"\(a\|b\|c\)*d", "bbbbd").is_some());
921 assert!(matches_exact(r"\(a\|b\|c\)*d", "aabbd").is_some());
922
923 assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "d").is_none());
924 assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "ad").is_some());
925 assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "abd").is_some());
926 assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "abcd").is_none());
927 assert!(matches_exact(r"\(\(a\|b\|c\)\)\{1,2\}d", "abd").is_some());
928 assert!(matches_exact(r"\(\(a\|b\|c\)\)\{1,2\}d", "abcd").is_none());
929 assert!(matches_exact(r"\(\(a\|b\|c\)\{1,2\}\)\{1,2\}d", "abad").is_some());
930 assert!(matches_exact(r"\(\(a\|b\|c\)\{1,2\}\)\{1,2\}d", "ababd").is_some());
931 assert!(matches_exact(r"\(\(a\|b\|c\)\{1,2\}\)\{1,2\}d", "ababad").is_none());
932
933 assert!(matches_exact(r"\(a\|b\|c\)\{4\}d", "ababad").is_none());
934 assert!(matches_exact(r"\(a\|b\|c\)\{4\}d", "ababd").is_some());
935 assert!(matches_exact(r"\(a\|b\|c\)\{4\}d", "abad").is_none());
936
937 assert!(matches_exact(r"\(\([abc]\)\)\{3\}", "abcTRAILING").is_some());
938 assert!(matches_exact(r"\(\([abc]\)\)\{3\}", "abTRAILING").is_none());
939 }
940 #[test]
941 fn backref() {
942 assert!(matches_exact(r"\([abc]\)\1d", "aad").is_some());
943 assert!(matches_exact(r"\([abc]\)\1d", "abd").is_none());
944 assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "abcabcd").is_some());
945 assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "abcbcd").is_none());
946 assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "ababd").is_some());
947 assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "abacd").is_none());
948
949 assert!(matches_exact(r"\([[:alpha:]]\).*\1d", "hellohd").is_some());
950 assert!(matches_exact(r"\([[:alpha:]]\).*\1d", "hellod").is_none());
951 assert!(matches_exact(r"\([[:alpha:]]\).*\1", "hello").is_none());
952 assert!(matches_exact(r"\([[:alpha:]]\).*\1", "helloh").is_some());
953
954 assert!(matches_exact(r"\(\)-\?\1d", "d").is_some());
955 assert!(matches_exact(r"\(\)-\?\1", "").is_some());
956
957 assert!(matches_exact(r"\(\1\)", "a").is_none());
960
961 assert!(matches_exact(r"\(h.\)\1\+!", "hihihi!").is_some());
962 assert!(matches_exact(r"\(h.\)\1\+!", "hehehe!").is_some());
963 assert!(matches_exact(r"\(h.\)\1\+!", "hahehe!").is_none());
964
965 assert!(matches_exact(
966 r"\(hello \(\<.*\>\) \)*how are you \2",
967 "hello world how are you world"
968 )
969 .is_some());
970 assert!(matches_exact(
971 r"\(hello \(\<.*\>\) \)*how are you \2",
972 "hello universe hello world how are you world"
973 )
974 .is_some());
975 assert!(matches_exact(
976 r"\(hello \(\<.*\>\) \)*how are you \2",
977 "hello world hello universe how are you world"
978 )
979 .is_none());
980 }
981 #[test]
982 fn case_insensitive() {
983 assert!(compile(r"abc[de]")
984 .case_insensitive(true)
985 .matches_exact(b"ABCD")
986 .is_some());
987 assert!(compile(r"abc[de]")
988 .case_insensitive(true)
989 .matches_exact(b"ABCF")
990 .is_none());
991 }
992 #[test]
993 fn newline() {
994 assert_eq!(
995 compile(r"^hello$")
996 .newline(true)
997 .matches(b"hi\nhello\ngreetings", None)
998 .len(),
999 1
1000 );
1001 assert!(compile(r"^hello$")
1002 .newline(true)
1003 .matches(b"hi\ngood day\ngreetings", None)
1004 .is_empty());
1005 }
1006 #[test]
1007 fn no_start_end() {
1008 assert!(compile(r"^hello")
1009 .no_start(true)
1010 .matches_exact(b"hello")
1011 .is_none());
1012 assert!(compile(r"hello$")
1013 .no_end(true)
1014 .matches_exact(b"hello")
1015 .is_none());
1016 }
1017
1018 #[cfg(feature = "bench")]
1019 #[bench]
1020 fn speed_matches_exact(b: &mut Bencher) {
1021 b.iter(|| {
1022 assert!(matches_exact(r"\(\(a*\|b\|c\) test\|yee\)", "aaaaa test").is_some());
1023 })
1024 }
1025 #[cfg(feature = "bench")]
1026 #[bench]
1027 fn speed_matches(b: &mut Bencher) {
1028 b.iter(|| {
1029 assert_eq!(
1030 matches(r"\(\(a*\|b\|c\) test\|yee\)", "oooo aaaaa test").len(),
1031 1
1032 );
1033 })
1034 }
1035}