posix_regex/
matcher.rs

1//! The matcher: Can find substrings in a string that match any compiled regex
2
3#[cfg(feature = "no_std")]
4use std::prelude::*;
5
6use std::borrow::Cow;
7use std::collections::HashSet;
8use std::fmt;
9use std::cell::RefCell;
10use std::rc::Rc;
11
12use compile::{Token, Range};
13use ctype;
14use immut_vec::ImmutVec;
15use tree::{*, Node as TreeNode};
16
17/// A regex matcher, ready to match stuff
18#[derive(Clone)]
19pub struct PosixRegex<'a> {
20    tree: Cow<'a, Tree>,
21    case_insensitive: bool,
22    newline: bool,
23    no_start: bool,
24    no_end: bool
25}
26impl<'a> PosixRegex<'a> {
27    /// Create a new matcher instance from the specified alternations. This
28    /// should probably not be used and instead an instance should be obtained
29    /// from `PosixRegexBuilder`, which also compiles a string into regex.
30    pub fn new(tree: Cow<'a, Tree>) -> Self {
31        Self {
32            tree,
33            case_insensitive: false,
34            newline: false,
35            no_start: false,
36            no_end: false
37        }
38    }
39    /// Chainable function to enable/disable case insensitivity. Default: false.
40    /// When enabled, single characters match both their upper and lowercase
41    /// representations.
42    pub fn case_insensitive(mut self, value: bool) -> Self {
43        self.case_insensitive = value;
44        self
45    }
46    /// Chainable function to enable/disable newline mode. Default: false.
47    /// When enabled, ^ and $ match newlines as well as start/end.
48    /// This behavior overrides both no_start and no_end.
49    pub fn newline(mut self, value: bool) -> Self {
50        self.newline = value;
51        self
52    }
53    /// Chainable function to enable/disable no_start mode. Default: false.
54    /// When enabled, ^ doesn't actually match the start of a string.
55    pub fn no_start(mut self, value: bool) -> Self {
56        self.no_start = value;
57        self
58    }
59    /// Chainable function to enable/disable no_start mode. Default: false.
60    /// When enabled, $ doesn't actually match the end of a string.
61    pub fn no_end(mut self, value: bool) -> Self {
62        self.no_end = value;
63        self
64    }
65    /// Return the total number of matches that **will** be returned by
66    /// `matches_exact` or in each match in `matches`.
67    pub fn count_groups(&self) -> usize {
68        let mut count = 1;
69        let mut cursor = self.tree[self.tree.root].child;
70        while let Some(node) = cursor {
71            // Walk tree
72            let node = &self.tree[node];
73            if node.child.is_some() {
74                cursor = node.child;
75            } else {
76                let mut node = Some(node);
77                while node.map(|node| node.next_sibling.is_none()).unwrap_or(false) {
78                    node = node.unwrap().parent.map(|node| &self.tree[node]);
79                }
80                cursor = node.and_then(|node| node.next_sibling);
81            }
82
83            // Count groups
84            if let Token::Group(_) = node.token {
85                count += 1;
86            }
87        }
88        count
89    }
90    /// Match the string starting at the current position. This does not find
91    /// substrings.
92    pub fn matches_exact(&self, input: &[u8]) -> Option<Box<[Option<(usize, usize)>]>> {
93        let mut matcher = PosixRegexMatcher {
94            base: self,
95            input,
96            offset: 0,
97            max_groups: self.count_groups()
98        };
99        let internal_prev = RefCell::new(Vec::new());
100        let prev = ImmutVec::new(&internal_prev);
101        let tree = self.tree[self.tree.root].children(&self.tree)
102            .filter_map(|node| self.tree[node].child.map(|child| Node::new(&self.tree, child, prev)))
103            .collect();
104
105        let start = matcher.offset;
106        match matcher.matches_exact(tree) {
107            None => None,
108            Some(mut groups) => {
109                assert_eq!(groups[0], None);
110                groups[0] = Some((start, matcher.offset));
111                Some(groups)
112            }
113        }
114    }
115    /// Match any substrings in the string, but optionally no more than `max`
116    pub fn matches(&self, input: &[u8], mut max: Option<usize>) -> Vec<Box<[Option<(usize, usize)>]>> {
117        let mut matcher = PosixRegexMatcher {
118            base: self,
119            input,
120            offset: 0,
121            max_groups: self.count_groups()
122        };
123
124        let mut arena = self.tree.arena.to_vec();
125
126        let root = self.tree[self.tree.root].child;
127
128        // Wrap everything in group
129        let group_id = NodeId::from(arena.len());
130        arena.push(TreeNode {
131            token: Token::Group(0),
132            range: Range(1, Some(1)),
133            parent: None,
134            next_sibling: None,
135            child: root
136        });
137
138        // Update parents
139        let mut cursor = root;
140        while let Some(node) = cursor {
141            let node = &mut arena[usize::from(node)];
142            cursor = node.next_sibling;
143            node.parent = Some(group_id);
144        }
145
146        // Push leading start
147        let start_id = NodeId::from(arena.len());
148        arena.push(TreeNode {
149            token: Token::InternalStart,
150            range: Range(0, None),
151            parent: None,
152            next_sibling: Some(group_id),
153            child: None
154        });
155
156        let tree = Tree {
157            arena: arena.into_boxed_slice(),
158            root: start_id
159        };
160        let internal_prev = RefCell::new(Vec::new());
161        let prev = ImmutVec::new(&internal_prev);
162        let tree = vec![Node::new(&tree, tree.root, prev)];
163
164        let mut matches = Vec::new();
165        while max.map(|max| max > 0).unwrap_or(true) && matcher.offset <= matcher.input.len() {
166            match matcher.matches_exact(tree.clone()) {
167                Some(groups) => {
168                    if groups[0].unwrap().0 == groups[0].unwrap().1 {
169                        matcher.offset += 1;
170                    }
171                    matches.push(groups)
172                },
173                None => break
174            }
175            max = max.map(|max| max - 1);
176        }
177        matches
178    }
179}
180
181#[derive(Clone, Copy, Debug)]
182struct GroupEvent {
183    open: bool,
184    id: usize,
185    offset: usize
186}
187#[derive(Clone, Copy)]
188struct BackRef {
189    offset: usize,
190    index: usize,
191    len: usize
192}
193
194#[derive(Clone)]
195struct Node<'a> {
196    tree: &'a Tree,
197    parent: Option<Rc<Node<'a>>>,
198    node: NodeId,
199    prev: ImmutVec<'a, GroupEvent>,
200    repeated: u32,
201    backref: Option<BackRef>
202}
203impl<'a> fmt::Debug for Node<'a> {
204    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
205        let mut range = self.node().range;
206        range.0 = range.0.saturating_sub(self.repeated);
207        range.1 = range.1.map(|max| max.saturating_sub(self.repeated));
208        write!(f, "{:?}", (&self.node().token, range))
209    }
210}
211impl<'a> Node<'a> {
212    /// Prepare a new node, such as linking back references
213    fn prepare(mut me: Self) -> Self {
214        me.repeated = 0;
215        me.backref = None;
216        if let Token::BackRef(id) = me.node().token {
217            let mut start = None;
218            let mut end = None;
219            for event in me.prev.iter_rev() {
220                if event.id != id as usize {
221                    continue;
222                }
223                if event.open {
224                    start = Some(event.offset);
225                    break;
226                } else {
227                    end = end.or(Some(event.offset));
228                }
229            }
230            if let (Some(start), Some(end)) = (start, end) {
231                me.backref = Some(BackRef {
232                    offset: start,
233                    index: 0,
234                    len: end - start
235                });
236                if start == end {
237                    // Empty group, mark as repeated enough times
238                    let Range(min, _) = me.node().range;
239                    me.repeated += min;
240                }
241            }
242        }
243        me
244    }
245    /// Create a new node. This is only called from the main function to start each alternative path
246    fn new(tree: &'a Tree, node: NodeId, prev: ImmutVec<'a, GroupEvent>) -> Self {
247        Self::prepare(Self {
248            tree: tree,
249            parent: None,
250            node,
251            prev,
252            repeated: 0,
253            backref: None
254        })
255    }
256    /// Expand this group node into its children
257    fn into_children(mut self, branches: &mut Vec<Node<'a>>, offset: usize) {
258        let id = match self.tree[self.node].token {
259            Token::Group(id) => id,
260            _ => return
261        };
262        self.repeated += 1;
263        let mut parent = Rc::new(self);
264        let mut empty = true;
265        for alternative in parent.tree[parent.node].children(&parent.tree) {
266            if let Some(node) = parent.tree[alternative].child {
267                empty = false;
268                branches.push(Self::prepare(Self {
269                    tree: parent.tree,
270                    parent: Some(Rc::clone(&parent)),
271                    node,
272                    prev: parent.prev.push(GroupEvent {
273                        open: true,
274                        id,
275                        offset,
276                    }),
277                    repeated: 0,
278                    backref: None
279                }));
280            }
281        }
282        if empty {
283            let mut parent = Rc::get_mut(&mut parent).expect("group empty but still there's a dangling reference");
284            for &open in &[true, false] {
285                parent.prev = parent.prev.push(GroupEvent {
286                    open,
287                    id,
288                    offset
289                });
290            }
291            parent.add_branches(branches, offset);
292        }
293    }
294    /// Get the internal token node without additional state metadata
295    fn node(&self) -> &TreeNode {
296        &self.tree[self.node]
297    }
298    /// Get a list of all capturing groups
299    fn get_capturing_groups(&self, max_count: usize, offset: usize) -> Box<[Option<(usize, usize)>]> {
300        let mut prev = self.prev;
301
302        // Close all currently open groups
303        let mut parent = self.node().parent;
304        while let Some(group) = parent {
305            let group = &self.tree[group];
306            parent = group.parent;
307            match group.token {
308                Token::Group(id) => prev = prev.push(GroupEvent {
309                    open: false,
310                    id,
311                    offset
312                }),
313                _ => ()
314            }
315        }
316
317        // Go backwards through the immutable list and add groups
318        let mut groups: Vec<(Option<usize>, Option<usize>)> = vec![(None, None); max_count];
319        for event in prev.iter_rev() {
320            let group = &mut groups[event.id];
321            if event.open {
322                group.0 = group.0.or(Some(event.offset));
323            } else {
324                group.1 = group.1.or(Some(event.offset));
325            }
326        }
327        groups.into_iter()
328            .map(|(start, end)| Some((start?, end?)))
329            .collect::<Vec<_>>()
330            .into_boxed_slice()
331    }
332    /// Increment this branch, such as moving a back reference or increasing the number of times repeated
333    fn increment(&mut self) {
334        if let Some(ref mut backref) = self.backref {
335            backref.index += 1;
336            if backref.index >= backref.len {
337                backref.index = 0;
338                self.repeated += 1;
339            }
340        } else {
341            self.repeated += 1;
342        }
343    }
344    /// Add all possible branches from this node, such as the next node or
345    /// possibly repeat the parent
346    fn add_branches(&self, branches: &mut Vec<Node<'a>>, offset: usize) {
347        let Range(min, _) = self.node().range;
348        if self.backref.map(|backref| backref.index > 0 || self.repeated < min).unwrap_or(false) {
349            // Wait for back reference to complete
350        } else if let Some(next) = self.node().next_sibling {
351            branches.push(Self::prepare(Self {
352                node: next,
353                ..self.clone()
354            }));
355        } else {
356            let parent = match self.parent {
357                Some(ref parent) => parent,
358                None => return
359            };
360            let Range(min, _) = parent.node().range;
361
362            // Get list of ids
363            let mut ids = Vec::new();
364            {
365                let mut parent = Some(parent);
366                while let Some(node) = parent {
367                    if let Token::Group(id) = node.node().token {
368                        ids.push(id);
369                    }
370                    parent = node.parent.as_ref();
371                }
372            }
373
374            if parent.repeated >= min {
375                // Group is closing, migrate previous & current groups to next.
376                let mut parent = Some(parent);
377                while parent.map(|parent| parent.node().next_sibling.is_none()).unwrap_or(false) {
378                    parent = parent.unwrap().parent.as_ref();
379                }
380                if let Some((node, next)) = parent.and_then(|parent| parent.node().next_sibling.map(|node| (parent, node))) {
381                    let clone = (**node).clone();
382                    let mut prev = self.prev;
383                    for &id in &ids {
384                        prev = prev.push(GroupEvent {
385                            open: false,
386                            id,
387                            offset
388                        });
389                    }
390                    branches.push(Self::prepare(Self {
391                        node: next,
392                        prev,
393                        ..clone
394                    }));
395                }
396            }
397
398            // Add repetitions
399            let mut parent = Some(parent);
400            while let Some(node) = parent {
401                parent = node.parent.as_ref();
402                let Range(_, max) = node.node().range;
403                if max.map(|max| node.repeated < max).unwrap_or(true) {
404                    let mut clone = (**node).clone();
405                    let mut prev = self.prev;
406                    for &id in &ids {
407                        prev = prev.push(GroupEvent {
408                            open: false,
409                            id,
410                            offset
411                        });
412                    }
413                    clone.prev = prev;
414                    clone.into_children(branches, offset);
415                }
416            }
417        }
418    }
419    /// Returns true if this node is the final node in the branch
420    fn is_final(&self) -> bool {
421        let Range(min, _) = self.node().range;
422        if self.repeated < min {
423            return false;
424        }
425
426        let mut next = Some(self);
427        while let Some(current) = next {
428            let mut node = current.node();
429            if node.token == Token::Alternative {
430                // Don't explore other alternatives
431                next = current.parent.as_ref().map(|node| &**node);
432                node = &self.tree[node.parent.expect("found root alternative")];
433            }
434            if let Token::Group(_) = node.token {
435                let Range(min, _) = node.range;
436                if current.repeated < min {
437                    return false;
438                }
439            }
440            if node.next_sibling.is_some() {
441                break;
442            }
443            next = current.parent.as_ref().map(|node| &**node);
444        }
445        next.and_then(|node| self.tree[node.node].next_sibling).is_none()
446    }
447}
448
449struct PosixRegexMatcher<'a> {
450    base: &'a PosixRegex<'a>,
451    input: &'a [u8],
452    offset: usize,
453    max_groups: usize
454}
455impl<'a> PosixRegexMatcher<'a> {
456    fn expand<'b>(&mut self, skip: &mut HashSet<NodeId>, branches: &mut [Node<'b>]) -> Vec<Node<'b>> {
457        let mut insert = Vec::new();
458
459        for branch in &mut *branches {
460            if skip.contains(&branch.node) {
461                continue;
462            }
463
464            let node = branch.node();
465
466            if let Token::Group(_) = node.token {
467                branch.clone().into_children(&mut insert, self.offset);
468            }
469
470            let Range(min, _) = node.range;
471            if branch.repeated >= min {
472                // Push the next element as a new branch
473                branch.add_branches(&mut insert, self.offset);
474            }
475        }
476
477        if !insert.is_empty() {
478            for branch in &mut *branches {
479                skip.insert(branch.node);
480            }
481            let mut new = self.expand(skip, &mut insert);
482            insert.append(&mut new);
483        }
484        insert
485    }
486
487    fn matches_exact(&mut self, mut branches: Vec<Node>) -> Option<Box<[Option<(usize, usize)>]>> {
488        // Whether or not any branch, at any point, got fully explored. This
489        // means at least one path of the regex successfully completed!
490        let mut succeeded = None;
491        let mut prev = self.offset.checked_sub(1).and_then(|index| self.input.get(index).cloned());
492
493        let mut set = HashSet::new();
494
495        loop {
496            let next = self.input.get(self.offset).cloned();
497
498            set.clear();
499            let mut insert = self.expand(&mut set, &mut branches);
500            branches.append(&mut insert);
501
502            // Handle zero-width stuff
503            loop {
504                let mut index = 0;
505                let mut remove = 0;
506
507                while index < branches.len() {
508                    if remove > 0 {
509                        branches.swap(index, index-remove);
510                    }
511                    let branch = &mut branches[index-remove];
512                    index += 1;
513
514                    let node = branch.node();
515
516                    match node.token {
517                        Token::End |
518                        Token::Start |
519                        Token::WordEnd |
520                        Token::WordStart => {
521                            let accepts = match node.token {
522                                Token::End =>
523                                    (!self.base.no_end && next.is_none())
524                                        || (self.base.newline && next == Some(b'\n')),
525                                Token::Start =>
526                                    (!self.base.no_start && self.offset == 0)
527                                        || (self.base.newline && prev == Some(b'\n')),
528                                Token::WordEnd => next.map(ctype::is_word_boundary).unwrap_or(true),
529                                Token::WordStart => prev.map(ctype::is_word_boundary).unwrap_or(true),
530                                _ => unreachable!()
531                            };
532                            if accepts {
533                                branch.increment();
534                                branch.add_branches(&mut insert, self.offset);
535                            }
536                            if branch.is_final() {
537                                succeeded = Some(branch.get_capturing_groups(self.max_groups, self.offset));
538                            }
539                            remove += 1;
540                        },
541                        _ => ()
542                    }
543                }
544                branches.truncate(branches.len() - remove);
545
546                if insert.is_empty() {
547                    break;
548                }
549                set.clear();
550                let mut insert2 = self.expand(&mut set, &mut insert);
551                branches.append(&mut insert);
552                branches.append(&mut insert2);
553            }
554
555            let mut index = 0;
556            let mut remove = 0;
557
558            // Handle stuff
559            while index < branches.len() {
560                if remove > 0 {
561                    // Just like Rust's `retain` function, shift all elements I
562                    // want to keep back and `truncate` when I'm done.
563                    branches.swap(index, index-remove);
564                }
565                let branch = &mut branches[index-remove];
566                index += 1;
567
568                let node = branch.node();
569                let Range(_, max) = node.range;
570
571                // Step 3: Check if the token matches
572                let accepts = max.map(|max| branch.repeated < max).unwrap_or(true) && match node.token {
573                    Token::InternalStart => next.is_some(),
574                    Token::Group { .. } => false, // <- content is already expanded and handled
575
576                    Token::Any => next.map(|c| !self.base.newline || c != b'\n').unwrap_or(false),
577                    Token::BackRef(_) => if let Some(ref backref) = branch.backref {
578                        next == Some(self.input[backref.offset + backref.index])
579                    } else { false },
580                    Token::Char(c) => if self.base.case_insensitive {
581                        next.map(|c2| c & !32 == c2 & !32).unwrap_or(false)
582                    } else {
583                        next == Some(c)
584                    },
585                    Token::OneOf { invert, ref list } => if let Some(next) = next {
586                        (!invert || !self.base.newline || next != b'\n')
587                        && list.iter().any(|c| c.matches(next, self.base.case_insensitive)) == !invert
588                    } else { false },
589
590                    Token::Alternative
591                    | Token::End
592                    | Token::Root
593                    | Token::Start
594                    | Token::WordEnd
595                    | Token::WordStart => unreachable!()
596                };
597
598                if accepts {
599                    branch.increment();
600                } else {
601                    if branch.is_final() {
602                        let groups = branch.get_capturing_groups(self.max_groups, self.offset);
603
604                        let mut add = true;
605                        if let Some((new_start, new_end)) = groups[0] {
606                            if let Some(previous) = succeeded.as_ref() {
607                                if let Some((prev_start, prev_end)) = previous[0] {
608                                    if new_end - new_start <= prev_end - prev_start {
609                                        add = false;
610                                    }
611                                }
612                            }
613                        }
614                        if add {
615                            succeeded = Some(groups);
616                        }
617                    }
618                    remove += 1;
619                }
620            }
621            let end = branches.len() - remove;
622            branches.truncate(end);
623
624            if branches.is_empty() ||
625                    // The internal start thing is lazy, not greedy:
626                    (succeeded.is_some() && branches.iter().all(|t| t.node().token == Token::InternalStart)) {
627                return succeeded;
628            }
629
630            if next.is_some() {
631                self.offset += 1;
632                prev = next;
633            }
634        }
635    }
636}
637
638#[cfg(test)]
639mod tests {
640    #[cfg(feature = "bench")]
641    extern crate test;
642
643    #[cfg(feature = "bench")]
644    use self::test::Bencher;
645
646    use super::*;
647    use ::PosixRegexBuilder;
648
649    // FIXME: Workaround to coerce a Box<[T; N]> into a Box<[T]>. Use type
650    // ascription when stabilized.
651    fn boxed_slice<T>(slice: Box<[T]>) -> Box<[T]> {
652        slice
653    }
654
655    macro_rules! abox {
656        ($($item:expr),*) => {
657            boxed_slice(Box::new([$($item),*]))
658        }
659    }
660
661    fn compile(regex: &str) -> PosixRegex {
662        PosixRegexBuilder::new(regex.as_bytes())
663            .with_default_classes()
664            .compile()
665            .expect("error compiling regex")
666    }
667    fn matches(regex: &str, input: &str) -> Vec<Box<[Option<(usize, usize)>]>> {
668        compile(regex)
669            .matches(input.as_bytes(), None)
670    }
671    fn matches_exact(regex: &str, input: &str) -> Option<Box<[Option<(usize, usize)>]>> {
672        compile(regex)
673            .matches_exact(input.as_bytes())
674    }
675
676    #[test]
677    fn basic() {
678        assert!(matches_exact("abc", "abc").is_some());
679        assert!(matches_exact("abc", "bbc").is_none());
680        assert!(matches_exact("abc", "acc").is_none());
681        assert!(matches_exact("abc", "abd").is_none());
682    }
683    #[test]
684    fn repetitions() {
685        assert!(matches_exact("abc*", "ab").is_some());
686        assert!(matches_exact("abc*", "abc").is_some());
687        assert!(matches_exact("abc*", "abccc").is_some());
688
689        assert!(matches_exact(r"a\{1,2\}b", "b").is_none());
690        assert!(matches_exact(r"a\{1,2\}b", "ab").is_some());
691        assert!(matches_exact(r"a\{1,2\}b", "aab").is_some());
692        assert!(matches_exact(r"a\{1,2\}b", "aaab").is_none());
693
694        assert!(matches_exact(r"[abc]\{3\}", "abcTRAILING").is_some());
695        assert!(matches_exact(r"[abc]\{3\}", "abTRAILING").is_none());
696    }
697    #[test]
698    fn any() {
699        assert!(matches_exact(".*", "").is_some());
700        assert!(matches_exact(".*b", "b").is_some());
701        assert!(matches_exact(".*b", "ab").is_some());
702        assert!(matches_exact(".*b", "aaaaab").is_some());
703        assert!(matches_exact(".*b", "HELLO WORLD").is_none());
704        assert!(matches_exact(".*b", "HELLO WORLDb").is_some());
705        assert!(matches_exact("H.*O WORLD", "HELLO WORLD").is_some());
706        assert!(matches_exact("H.*ORLD", "HELLO WORLD").is_some());
707    }
708    #[test]
709    fn brackets() {
710        assert!(matches_exact("[abc]*d", "abcd").is_some());
711        assert!(matches_exact("[0-9]*d", "1234d").is_some());
712        assert!(matches_exact("[[:digit:]]*d", "1234d").is_some());
713        assert!(matches_exact("[[:digit:]]*d", "abcd").is_none());
714    }
715    #[test]
716    fn alternations() {
717        assert!(matches_exact(r"abc\|bcd", "abc").is_some());
718        assert!(matches_exact(r"abc\|bcd", "bcd").is_some());
719        assert!(matches_exact(r"abc\|bcd", "cde").is_none());
720        assert!(matches_exact(r"[A-Z]\+\|yee", "").is_none());
721        assert!(matches_exact(r"[A-Z]\+\|yee", "HELLO").is_some());
722        assert!(matches_exact(r"[A-Z]\+\|yee", "yee").is_some());
723        assert!(matches_exact(r"[A-Z]\+\|yee", "hello").is_none());
724    }
725    #[test]
726    fn offsets() {
727        assert_eq!(
728            matches_exact("abc", "abcd"),
729            Some(abox![Some((0, 3))])
730        );
731        assert_eq!(
732            matches_exact(r"[[:alpha:]]\+", "abcde12345"),
733            Some(abox![Some((0, 5))])
734        );
735        assert_eq!(
736            matches_exact(r"a\(bc\)\+d", "abcbcd"),
737            Some(abox![Some((0, 6)), Some((3, 5))])
738        );
739        assert_eq!(
740            matches_exact(r"hello\( \(world\|universe\) :D\)\?!", "hello world :D!"),
741            Some(abox![Some((0, 15)), Some((5, 14)), Some((6, 11))])
742        );
743        assert_eq!(
744            matches_exact(r"hello\( \(world\|universe\) :D\)\?", "hello world :D"),
745            Some(abox![Some((0, 14)), Some((5, 14)), Some((6, 11))])
746        );
747        assert_eq!(
748            matches_exact(r"\(\<hello\>\) world", "hello world"),
749            Some(abox![Some((0, 11)), Some((0, 5))])
750        );
751        assert_eq!(
752            matches_exact(r".*d", "hid howd ared youd"),
753            Some(abox![Some((0, 18))])
754        );
755        assert_eq!(
756            matches_exact(r".*\(a\)", "bbbbba"),
757            Some(abox![Some((0, 6)), Some((5, 6))])
758        );
759        assert_eq!(
760            matches_exact(r"\(a \(b\) \(c\)\) \(d\)", "a b c d"),
761            Some(abox![Some((0, 7)), Some((0, 5)), Some((2, 3)), Some((4, 5)), Some((6, 7))])
762        );
763        assert_eq!(
764            matches_exact(r"\(.\)*", "hello"),
765            Some(abox![Some((0, 5)), Some((4, 5))])
766        );
767        assert_eq!(
768            matches(r"h\(i\)", "hello hi lol"),
769            vec![abox![Some((6, 8)), Some((7, 8))]]
770        );
771        assert_eq!(
772            matches_exact(r"\(\([[:alpha:]]\)*\)", "abcdefg"),
773            Some(abox![Some((0, 7)), Some((0, 7)), Some((6, 7))])
774        );
775        assert_eq!(
776            matches_exact(r"\(\.\([[:alpha:]]\)\)*", ".a.b.c.d.e.f.g"),
777            Some(abox![Some((0, 14)), Some((12, 14)), Some((13, 14))])
778        );
779        assert_eq!(
780            matches_exact(r"\(a\|\(b\)\)*\(c\)", "bababac"),
781            Some(abox![Some((0, 7)), Some((5, 6)), Some((4, 5)), Some((6, 7))])
782        );
783        assert_eq!(
784            matches_exact(r"\(a\|\(b\)\)*\(c\)", "aaac"),
785            Some(abox![Some((0, 4)), Some((2, 3)), None, Some((3, 4))])
786        );
787        assert_eq!(
788            matches_exact(r"a\(\)bc", "abc"),
789            Some(abox![Some((0, 3)), Some((1, 1))])
790        );
791    }
792    #[test]
793    fn matches_is_lazy() {
794        assert_eq!(
795            matches(r"\(hi\)\+", "hello hihi kek"),
796            vec![abox![Some((6, 10)), Some((8, 10))]]
797        );
798        assert_eq!(
799            matches(r"o\+", "helloooooooo woooorld, hooow are you?"),
800            vec![abox![Some((4, 12))], abox![Some((14, 18))], abox![Some((24, 27))], abox![Some((34, 35))]]
801        );
802        assert_eq!(
803            matches(r"z*", "abc"),
804            vec![abox![Some((0, 0))], abox![Some((1, 1))], abox![Some((2, 2))], abox![Some((3, 3))]]
805        );
806    }
807    #[test]
808    fn start_and_end() {
809        assert!(matches_exact("^abc$", "abc").is_some());
810        assert!(matches_exact("^bcd", "bcde").is_some());
811        assert!(matches_exact("^bcd", "abcd").is_none());
812        assert!(matches_exact("abc$", "abc").is_some());
813        assert!(matches_exact("abc$", "abcd").is_none());
814
815        assert!(matches_exact(r".*\(^\|a\)c", "c").is_some());
816        assert!(matches_exact(r".*\(^\|a\)c", "ac").is_some());
817        assert!(matches_exact(r".*\(^\|a\)c", "bc").is_none());
818
819        // Tests if ^ can be repeated without issues
820        assert!(matches_exact(".*^^a", "helloabc").is_none());
821        assert!(matches_exact(".*^^a", "abc").is_some());
822    }
823    #[test]
824    fn word_boundaries() {
825        assert!(matches_exact(r"hello\>.world", "hello world").is_some());
826        assert!(matches_exact(r"hello\>.world", "hello!world").is_some());
827        assert!(matches_exact(r"hello\>.world", "hellooworld").is_none());
828
829        assert!(matches_exact(r"hello.\<world", "hello world").is_some());
830        assert!(matches_exact(r"hello.\<world", "hello!world").is_some());
831        assert!(matches_exact(r"hello.\<world", "hellooworld").is_none());
832
833        assert!(matches_exact(r".*\<hello\>", "hihello").is_none());
834        assert!(matches_exact(r".*\<hello\>", "hi_hello").is_none());
835        assert!(matches_exact(r".*\<hello\>", "hi hello").is_some());
836    }
837    #[test]
838    fn groups() {
839        assert!(matches_exact(r"\(a*\)*", "aaaaa").is_some());
840        assert!(matches_exact(r"\(hello\) world", "hello world").is_some());
841        assert!(matches_exact(r"\(a*\|b\|c\)d", "d").is_some());
842        assert!(matches_exact(r"\(a*\|b\|c\)d", "aaaad").is_some());
843        assert!(matches_exact(r"\(a*\|b\|c\)d", "bd").is_some());
844        assert!(matches_exact(r"\(a*\|b\|c\)d", "bbbbbd").is_none());
845    }
846    #[test]
847    fn repeating_groups() {
848        assert!(matches_exact(r"\(a\|b\|c\)*d", "d").is_some());
849        assert!(matches_exact(r"\(a\|b\|c\)*d", "aaaad").is_some());
850        assert!(matches_exact(r"\(a\|b\|c\)*d", "bbbbd").is_some());
851        assert!(matches_exact(r"\(a\|b\|c\)*d", "aabbd").is_some());
852
853        assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "d").is_none());
854        assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "ad").is_some());
855        assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "abd").is_some());
856        assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "abcd").is_none());
857        assert!(matches_exact(r"\(\(a\|b\|c\)\)\{1,2\}d", "abd").is_some());
858        assert!(matches_exact(r"\(\(a\|b\|c\)\)\{1,2\}d", "abcd").is_none());
859        assert!(matches_exact(r"\(\(a\|b\|c\)\{1,2\}\)\{1,2\}d", "abad").is_some());
860        assert!(matches_exact(r"\(\(a\|b\|c\)\{1,2\}\)\{1,2\}d", "ababd").is_some());
861        assert!(matches_exact(r"\(\(a\|b\|c\)\{1,2\}\)\{1,2\}d", "ababad").is_none());
862
863        assert!(matches_exact(r"\(a\|b\|c\)\{4\}d", "ababad").is_none());
864        assert!(matches_exact(r"\(a\|b\|c\)\{4\}d", "ababd").is_some());
865        assert!(matches_exact(r"\(a\|b\|c\)\{4\}d", "abad").is_none());
866
867        assert!(matches_exact(r"\(\([abc]\)\)\{3\}", "abcTRAILING").is_some());
868        assert!(matches_exact(r"\(\([abc]\)\)\{3\}", "abTRAILING").is_none());
869    }
870    #[test]
871    fn backref() {
872        assert!(matches_exact(r"\([abc]\)\1d", "aad").is_some());
873        assert!(matches_exact(r"\([abc]\)\1d", "abd").is_none());
874        assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "abcabcd").is_some());
875        assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "abcbcd").is_none());
876        assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "ababd").is_some());
877        assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "abacd").is_none());
878
879        assert!(matches_exact(r"\([[:alpha:]]\).*\1d", "hellohd").is_some());
880        assert!(matches_exact(r"\([[:alpha:]]\).*\1d", "hellod").is_none());
881        assert!(matches_exact(r"\([[:alpha:]]\).*\1", "hello").is_none());
882        assert!(matches_exact(r"\([[:alpha:]]\).*\1", "helloh").is_some());
883
884        assert!(matches_exact(r"\(\)-\?\1d", "d").is_some());
885        assert!(matches_exact(r"\(\)-\?\1", "").is_some());
886
887        // Just make sure this doesn't crash it (even though it should error
888        // but I'm too lazy)
889        assert!(matches_exact(r"\(\1\)", "a").is_none());
890
891        assert!(matches_exact(r"\(h.\)\1\+!", "hihihi!").is_some());
892        assert!(matches_exact(r"\(h.\)\1\+!", "hehehe!").is_some());
893        assert!(matches_exact(r"\(h.\)\1\+!", "hahehe!").is_none());
894
895        assert!(matches_exact(
896            r"\(hello \(\<.*\>\) \)*how are you \2",
897            "hello world how are you world"
898        ).is_some());
899        assert!(matches_exact(
900            r"\(hello \(\<.*\>\) \)*how are you \2",
901            "hello universe hello world how are you world"
902        ).is_some());
903        assert!(matches_exact(
904            r"\(hello \(\<.*\>\) \)*how are you \2",
905            "hello world hello universe how are you world"
906        ).is_none());
907    }
908    #[test]
909    fn case_insensitive() {
910        assert!(compile(r"abc[de]")
911            .case_insensitive(true)
912            .matches_exact(b"ABCD")
913            .is_some());
914        assert!(compile(r"abc[de]")
915            .case_insensitive(true)
916            .matches_exact(b"ABCF")
917            .is_none());
918    }
919    #[test]
920    fn newline() {
921        assert_eq!(compile(r"^hello$")
922            .newline(true)
923            .matches(b"hi\nhello\ngreetings", None)
924            .len(), 1);
925        assert!(compile(r"^hello$")
926            .newline(true)
927            .matches(b"hi\ngood day\ngreetings", None)
928            .is_empty());
929    }
930    #[test]
931    fn no_start_end() {
932        assert!(compile(r"^hello")
933            .no_start(true)
934            .matches_exact(b"hello")
935            .is_none());
936        assert!(compile(r"hello$")
937            .no_end(true)
938            .matches_exact(b"hello")
939            .is_none());
940    }
941
942    #[cfg(feature = "bench")]
943    #[bench]
944    fn speed_matches_exact(b: &mut Bencher) {
945        b.iter(|| {
946            assert!(matches_exact(r"\(\(a*\|b\|c\) test\|yee\)", "aaaaa test").is_some());
947        })
948    }
949    #[cfg(feature = "bench")]
950    #[bench]
951    fn speed_matches(b: &mut Bencher) {
952        b.iter(|| {
953            assert_eq!(matches(r"\(\(a*\|b\|c\) test\|yee\)", "oooo aaaaa test").len(), 1);
954        })
955    }
956}