posix_regex/
matcher.rs

1//! The matcher: Can find substrings in a string that match any compiled regex
2
3#[cfg(feature = "no_std")]
4use alloc::collections::BTreeSet as HashSet;
5
6#[cfg(not(feature = "no_std"))]
7use std::collections::HashSet;
8
9use alloc::{borrow::Cow, boxed::Box, rc::Rc, vec, vec::Vec};
10use core::{cell::RefCell, fmt};
11
12use crate::{
13    compile::{Range, Token},
14    ctype,
15    immut_vec::ImmutVec,
16    tree::{Node as TreeNode, *},
17};
18
19/// A regex matcher, ready to match stuff
20#[derive(Clone)]
21pub struct PosixRegex<'a> {
22    tree: Cow<'a, Tree>,
23    case_insensitive: bool,
24    newline: bool,
25    no_start: bool,
26    no_end: bool,
27}
28impl<'a> PosixRegex<'a> {
29    /// Create a new matcher instance from the specified alternations. This
30    /// should probably not be used and instead an instance should be obtained
31    /// from `PosixRegexBuilder`, which also compiles a string into regex.
32    pub fn new(tree: Cow<'a, Tree>) -> Self {
33        Self {
34            tree,
35            case_insensitive: false,
36            newline: false,
37            no_start: false,
38            no_end: false,
39        }
40    }
41    /// Chainable function to enable/disable case insensitivity. Default: false.
42    /// When enabled, single characters match both their upper and lowercase
43    /// representations.
44    pub fn case_insensitive(mut self, value: bool) -> Self {
45        self.case_insensitive = value;
46        self
47    }
48    /// Chainable function to enable/disable newline mode. Default: false.
49    /// When enabled, ^ and $ match newlines as well as start/end.
50    /// This behavior overrides both no_start and no_end.
51    pub fn newline(mut self, value: bool) -> Self {
52        self.newline = value;
53        self
54    }
55    /// Chainable function to enable/disable no_start mode. Default: false.
56    /// When enabled, ^ doesn't actually match the start of a string.
57    pub fn no_start(mut self, value: bool) -> Self {
58        self.no_start = value;
59        self
60    }
61    /// Chainable function to enable/disable no_start mode. Default: false.
62    /// When enabled, $ doesn't actually match the end of a string.
63    pub fn no_end(mut self, value: bool) -> Self {
64        self.no_end = value;
65        self
66    }
67    /// Return the total number of matches that **will** be returned by
68    /// `matches_exact` or in each match in `matches`.
69    pub fn count_groups(&self) -> usize {
70        let mut count = 1;
71        let mut cursor = self.tree[self.tree.root].child;
72        while let Some(node) = cursor {
73            // Walk tree
74            let node = &self.tree[node];
75            if node.child.is_some() {
76                cursor = node.child;
77            } else {
78                let mut node = Some(node);
79                while node
80                    .map(|node| node.next_sibling.is_none())
81                    .unwrap_or(false)
82                {
83                    node = node.unwrap().parent.map(|node| &self.tree[node]);
84                }
85                cursor = node.and_then(|node| node.next_sibling);
86            }
87
88            // Count groups
89            if let Token::Group(_) = node.token {
90                count += 1;
91            }
92        }
93        count
94    }
95    /// Match the string starting at the current position. This does not find
96    /// substrings.
97    #[allow(clippy::type_complexity)]
98    pub fn matches_exact(&self, input: &[u8]) -> Option<Box<[Option<(usize, usize)>]>> {
99        let mut matcher = PosixRegexMatcher {
100            base: self,
101            input,
102            offset: 0,
103            max_groups: self.count_groups(),
104        };
105        let internal_prev = RefCell::new(Vec::new());
106        let prev = ImmutVec::new(&internal_prev);
107        let tree = self.tree[self.tree.root]
108            .children(&self.tree)
109            .filter_map(|node| {
110                self.tree[node]
111                    .child
112                    .map(|child| Node::new(&self.tree, child, prev))
113            })
114            .collect();
115
116        let start = matcher.offset;
117        match matcher.matches_exact(tree) {
118            None => None,
119            Some(mut groups) => {
120                assert_eq!(groups[0], None);
121                groups[0] = Some((start, matcher.offset));
122                Some(groups)
123            }
124        }
125    }
126    /// Match any substrings in the string, but optionally no more than `max`
127    #[allow(clippy::type_complexity)]
128    pub fn matches(
129        &self,
130        input: &[u8],
131        mut max: Option<usize>,
132    ) -> Vec<Box<[Option<(usize, usize)>]>> {
133        let mut matcher = PosixRegexMatcher {
134            base: self,
135            input,
136            offset: 0,
137            max_groups: self.count_groups(),
138        };
139
140        let mut arena = self.tree.arena.to_vec();
141
142        let root = self.tree[self.tree.root].child;
143
144        // Wrap everything in group
145        let group_id = NodeId::from(arena.len());
146        arena.push(TreeNode {
147            token: Token::Group(0),
148            range: Range(1, Some(1)),
149            parent: None,
150            next_sibling: None,
151            child: root,
152        });
153
154        // Update parents
155        let mut cursor = root;
156        while let Some(node) = cursor {
157            let node = &mut arena[usize::from(node)];
158            cursor = node.next_sibling;
159            node.parent = Some(group_id);
160        }
161
162        // Push leading start
163        let start_id = NodeId::from(arena.len());
164        arena.push(TreeNode {
165            token: Token::InternalStart,
166            range: Range(0, None),
167            parent: None,
168            next_sibling: Some(group_id),
169            child: None,
170        });
171
172        let tree = Tree {
173            arena: arena.into_boxed_slice(),
174            root: start_id,
175        };
176        let internal_prev = RefCell::new(Vec::new());
177        let prev = ImmutVec::new(&internal_prev);
178        let tree = vec![Node::new(&tree, tree.root, prev)];
179
180        let mut matches = Vec::new();
181        while max.map(|max| max > 0).unwrap_or(true) && matcher.offset <= matcher.input.len() {
182            match matcher.matches_exact(tree.clone()) {
183                Some(groups) => {
184                    if groups[0].unwrap().0 == groups[0].unwrap().1 {
185                        matcher.offset += 1;
186                    }
187                    matches.push(groups)
188                }
189                None => break,
190            }
191            max = max.map(|max| max - 1);
192        }
193        matches
194    }
195}
196
197#[derive(Clone, Copy, Debug)]
198struct GroupEvent {
199    open: bool,
200    id: usize,
201    offset: usize,
202}
203#[derive(Clone, Copy)]
204struct BackRef {
205    offset: usize,
206    index: usize,
207    len: usize,
208}
209
210#[derive(Clone)]
211struct Node<'a> {
212    tree: &'a Tree,
213    parent: Option<Rc<Node<'a>>>,
214    node: NodeId,
215    prev: ImmutVec<'a, GroupEvent>,
216    repeated: u32,
217    backref: Option<BackRef>,
218}
219impl fmt::Debug for Node<'_> {
220    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
221        let mut range = self.node().range;
222        range.0 = range.0.saturating_sub(self.repeated);
223        range.1 = range.1.map(|max| max.saturating_sub(self.repeated));
224        write!(f, "{:?}", (&self.node().token, range))
225    }
226}
227impl<'a> Node<'a> {
228    /// Prepare a new node, such as linking back references
229    fn prepare(mut me: Self) -> Self {
230        me.repeated = 0;
231        me.backref = None;
232        if let Token::BackRef(id) = me.node().token {
233            let mut start = None;
234            let mut end = None;
235            for event in me.prev.iter_rev() {
236                if event.id != id as usize {
237                    continue;
238                }
239                if event.open {
240                    start = Some(event.offset);
241                    break;
242                } else {
243                    end = end.or(Some(event.offset));
244                }
245            }
246            if let (Some(start), Some(end)) = (start, end) {
247                me.backref = Some(BackRef {
248                    offset: start,
249                    index: 0,
250                    len: end - start,
251                });
252                if start == end {
253                    // Empty group, mark as repeated enough times
254                    let Range(min, _) = me.node().range;
255                    me.repeated += min;
256                }
257            }
258        }
259        me
260    }
261    /// Create a new node. This is only called from the main function to start each alternative path
262    fn new(tree: &'a Tree, node: NodeId, prev: ImmutVec<'a, GroupEvent>) -> Self {
263        Self::prepare(Self {
264            tree,
265            parent: None,
266            node,
267            prev,
268            repeated: 0,
269            backref: None,
270        })
271    }
272    /// Expand this group node into its children
273    fn into_children(mut self, branches: &mut Vec<Node<'a>>, offset: usize) {
274        let id = match self.tree[self.node].token {
275            Token::Group(id) => id,
276            _ => return,
277        };
278        self.repeated += 1;
279        let mut parent = Rc::new(self);
280        let mut empty = true;
281        for alternative in parent.tree[parent.node].children(parent.tree) {
282            if let Some(node) = parent.tree[alternative].child {
283                empty = false;
284                branches.push(Self::prepare(Self {
285                    tree: parent.tree,
286                    parent: Some(Rc::clone(&parent)),
287                    node,
288                    prev: parent.prev.push(GroupEvent {
289                        open: true,
290                        id,
291                        offset,
292                    }),
293                    repeated: 0,
294                    backref: None,
295                }));
296            }
297        }
298        if empty {
299            let parent = Rc::get_mut(&mut parent)
300                .expect("group empty but still there's a dangling reference");
301            for &open in &[true, false] {
302                parent.prev = parent.prev.push(GroupEvent { open, id, offset });
303            }
304            parent.add_branches(branches, offset);
305        }
306    }
307    /// Get the internal token node without additional state metadata
308    fn node(&self) -> &TreeNode {
309        &self.tree[self.node]
310    }
311    /// Get a list of all capturing groups
312    fn get_capturing_groups(
313        &self,
314        max_count: usize,
315        offset: usize,
316    ) -> Box<[Option<(usize, usize)>]> {
317        let mut prev = self.prev;
318
319        // Close all currently open groups
320        let mut parent = self.node().parent;
321        while let Some(group) = parent {
322            let group = &self.tree[group];
323            parent = group.parent;
324            if let Token::Group(id) = group.token {
325                prev = prev.push(GroupEvent {
326                    open: false,
327                    id,
328                    offset,
329                })
330            }
331        }
332
333        // Go backwards through the immutable list and add groups
334        let mut groups: Vec<(Option<usize>, Option<usize>)> = vec![(None, None); max_count];
335        for event in prev.iter_rev() {
336            let group = &mut groups[event.id];
337            if event.open {
338                group.0 = group.0.or(Some(event.offset));
339            } else {
340                group.1 = group.1.or(Some(event.offset));
341            }
342        }
343        groups
344            .into_iter()
345            .map(|(start, end)| Some((start?, end?)))
346            .collect::<Vec<_>>()
347            .into_boxed_slice()
348    }
349    /// Increment this branch, such as moving a back reference or increasing the number of times repeated
350    fn increment(&mut self) {
351        if let Some(ref mut backref) = self.backref {
352            backref.index += 1;
353            if backref.index >= backref.len {
354                backref.index = 0;
355                self.repeated += 1;
356            }
357        } else {
358            self.repeated += 1;
359        }
360    }
361    /// Add all possible branches from this node, such as the next node or
362    /// possibly repeat the parent
363    fn add_branches(&self, branches: &mut Vec<Node<'a>>, offset: usize) {
364        let Range(min, _) = self.node().range;
365        if self
366            .backref
367            .map(|backref| backref.index > 0 || self.repeated < min)
368            .unwrap_or(false)
369        {
370            // Wait for back reference to complete
371        } else if let Some(next) = self.node().next_sibling {
372            branches.push(Self::prepare(Self {
373                node: next,
374                ..self.clone()
375            }));
376        } else {
377            let parent = match self.parent {
378                Some(ref parent) => parent,
379                None => return,
380            };
381            let Range(min, _) = parent.node().range;
382
383            // Get list of ids
384            let mut ids = Vec::new();
385            {
386                let mut parent = Some(parent);
387                while let Some(node) = parent {
388                    if let Token::Group(id) = node.node().token {
389                        ids.push(id);
390                    }
391                    parent = node.parent.as_ref();
392                }
393            }
394
395            if parent.repeated >= min {
396                // Group is closing, migrate previous & current groups to next.
397                let mut parent = Some(parent);
398                while parent
399                    .map(|parent| parent.node().next_sibling.is_none())
400                    .unwrap_or(false)
401                {
402                    parent = parent.unwrap().parent.as_ref();
403                }
404                if let Some((node, next)) =
405                    parent.and_then(|parent| parent.node().next_sibling.map(|node| (parent, node)))
406                {
407                    let clone = (**node).clone();
408                    let mut prev = self.prev;
409                    for &id in &ids {
410                        prev = prev.push(GroupEvent {
411                            open: false,
412                            id,
413                            offset,
414                        });
415                    }
416                    branches.push(Self::prepare(Self {
417                        node: next,
418                        prev,
419                        ..clone
420                    }));
421                }
422            }
423
424            // Add repetitions
425            let mut parent = Some(parent);
426            while let Some(node) = parent {
427                parent = node.parent.as_ref();
428                let Range(_, max) = node.node().range;
429                if max.map(|max| node.repeated < max).unwrap_or(true) {
430                    let mut clone = (**node).clone();
431                    let mut prev = self.prev;
432                    for &id in &ids {
433                        prev = prev.push(GroupEvent {
434                            open: false,
435                            id,
436                            offset,
437                        });
438                    }
439                    clone.prev = prev;
440                    clone.into_children(branches, offset);
441                }
442            }
443        }
444    }
445    /// Returns true if this node is the final node in the branch
446    fn is_final(&self) -> bool {
447        let Range(min, _) = self.node().range;
448        if self.repeated < min {
449            return false;
450        }
451
452        let mut next = Some(self);
453        while let Some(current) = next {
454            let mut node = current.node();
455            if node.token == Token::Alternative {
456                // Don't explore other alternatives
457                next = current.parent.as_deref();
458                node = &self.tree[node.parent.expect("found root alternative")];
459            }
460            if let Token::Group(_) = node.token {
461                let Range(min, _) = node.range;
462                if current.repeated < min {
463                    return false;
464                }
465            }
466            if node.next_sibling.is_some() {
467                break;
468            }
469            next = current.parent.as_deref();
470        }
471        next.and_then(|node| self.tree[node.node].next_sibling)
472            .is_none()
473    }
474}
475
476struct PosixRegexMatcher<'a> {
477    base: &'a PosixRegex<'a>,
478    input: &'a [u8],
479    offset: usize,
480    max_groups: usize,
481}
482impl PosixRegexMatcher<'_> {
483    fn expand<'b>(
484        &mut self,
485        skip: &mut HashSet<NodeId>,
486        branches: &mut [Node<'b>],
487    ) -> Vec<Node<'b>> {
488        let mut insert = Vec::new();
489
490        for branch in &mut *branches {
491            if skip.contains(&branch.node) {
492                continue;
493            }
494
495            let node = branch.node();
496
497            if let Token::Group(_) = node.token {
498                branch.clone().into_children(&mut insert, self.offset);
499            }
500
501            let Range(min, _) = node.range;
502            if branch.repeated >= min {
503                // Push the next element as a new branch
504                branch.add_branches(&mut insert, self.offset);
505            }
506        }
507
508        if !insert.is_empty() {
509            for branch in &mut *branches {
510                skip.insert(branch.node);
511            }
512            let mut new = self.expand(skip, &mut insert);
513            insert.append(&mut new);
514        }
515        insert
516    }
517
518    #[allow(clippy::type_complexity)]
519    fn matches_exact(&mut self, mut branches: Vec<Node>) -> Option<Box<[Option<(usize, usize)>]>> {
520        // Whether or not any branch, at any point, got fully explored. This
521        // means at least one path of the regex successfully completed!
522        let mut succeeded = None;
523        let mut prev = self
524            .offset
525            .checked_sub(1)
526            .and_then(|index| self.input.get(index).cloned());
527
528        let mut set = HashSet::new();
529
530        loop {
531            let next = self.input.get(self.offset).cloned();
532
533            set.clear();
534            let mut insert = self.expand(&mut set, &mut branches);
535            branches.append(&mut insert);
536
537            // Handle zero-width stuff
538            loop {
539                let mut index = 0;
540                let mut remove = 0;
541
542                while index < branches.len() {
543                    if remove > 0 {
544                        branches.swap(index, index - remove);
545                    }
546                    let branch = &mut branches[index - remove];
547                    index += 1;
548
549                    let node = branch.node();
550
551                    match node.token {
552                        Token::End | Token::Start | Token::WordEnd | Token::WordStart => {
553                            let accepts = match node.token {
554                                Token::End => {
555                                    (!self.base.no_end && next.is_none())
556                                        || (self.base.newline && next == Some(b'\n'))
557                                }
558                                Token::Start => {
559                                    (!self.base.no_start && self.offset == 0)
560                                        || (self.base.newline && prev == Some(b'\n'))
561                                }
562                                Token::WordEnd => next.map(ctype::is_word_boundary).unwrap_or(true),
563                                Token::WordStart => {
564                                    prev.map(ctype::is_word_boundary).unwrap_or(true)
565                                }
566                                _ => unreachable!(),
567                            };
568                            if accepts {
569                                branch.increment();
570                                branch.add_branches(&mut insert, self.offset);
571                            }
572                            if branch.is_final() {
573                                succeeded =
574                                    Some(branch.get_capturing_groups(self.max_groups, self.offset));
575                            }
576                            remove += 1;
577                        }
578                        _ => (),
579                    }
580                }
581                branches.truncate(branches.len() - remove);
582
583                if insert.is_empty() {
584                    break;
585                }
586                set.clear();
587                let mut insert2 = self.expand(&mut set, &mut insert);
588                branches.append(&mut insert);
589                branches.append(&mut insert2);
590            }
591
592            let mut index = 0;
593            let mut remove = 0;
594
595            // Handle stuff
596            while index < branches.len() {
597                if remove > 0 {
598                    // Just like Rust's `retain` function, shift all elements I
599                    // want to keep back and `truncate` when I'm done.
600                    branches.swap(index, index - remove);
601                }
602                let branch = &mut branches[index - remove];
603                index += 1;
604
605                let node = branch.node();
606                let Range(_, max) = node.range;
607
608                // Step 3: Check if the token matches
609                let accepts = max.map(|max| branch.repeated < max).unwrap_or(true)
610                    && match node.token {
611                        Token::InternalStart => next.is_some(),
612                        Token::Group { .. } => false, // <- content is already expanded and handled
613
614                        Token::Any => next
615                            .map(|c| !self.base.newline || c != b'\n')
616                            .unwrap_or(false),
617                        Token::BackRef(_) => {
618                            if let Some(ref backref) = branch.backref {
619                                next == Some(self.input[backref.offset + backref.index])
620                            } else {
621                                false
622                            }
623                        }
624                        Token::Char(c) => {
625                            if self.base.case_insensitive {
626                                next.map(|c2| c & !32 == c2 & !32).unwrap_or(false)
627                            } else {
628                                next == Some(c)
629                            }
630                        }
631                        Token::OneOf { invert, ref list } => {
632                            if let Some(next) = next {
633                                (!invert || !self.base.newline || next != b'\n')
634                                    && list
635                                        .iter()
636                                        .any(|c| c.matches(next, self.base.case_insensitive))
637                                        != invert
638                            } else {
639                                false
640                            }
641                        }
642
643                        Token::Alternative
644                        | Token::End
645                        | Token::Root
646                        | Token::Start
647                        | Token::WordEnd
648                        | Token::WordStart => unreachable!(),
649                    };
650
651                if accepts {
652                    branch.increment();
653                } else {
654                    if branch.is_final() {
655                        let groups = branch.get_capturing_groups(self.max_groups, self.offset);
656
657                        let mut add = true;
658                        if let Some((new_start, new_end)) = groups[0] {
659                            if let Some(previous) = succeeded.as_ref() {
660                                if let Some((prev_start, prev_end)) = previous[0] {
661                                    if new_end - new_start <= prev_end - prev_start {
662                                        add = false;
663                                    }
664                                }
665                            }
666                        }
667                        if add {
668                            succeeded = Some(groups);
669                        }
670                    }
671                    remove += 1;
672                }
673            }
674            let end = branches.len() - remove;
675            branches.truncate(end);
676
677            if branches.is_empty() ||
678                    // The internal start thing is lazy, not greedy:
679                    (succeeded.is_some() && branches.iter().all(|t| t.node().token == Token::InternalStart))
680            {
681                return succeeded;
682            }
683
684            if next.is_some() {
685                self.offset += 1;
686                prev = next;
687            }
688        }
689    }
690}
691
692#[cfg(test)]
693mod tests {
694    #[cfg(feature = "bench")]
695    extern crate test;
696
697    #[cfg(feature = "bench")]
698    use self::test::Bencher;
699
700    use super::*;
701    use crate::PosixRegexBuilder;
702
703    // FIXME: Workaround to coerce a Box<[T; N]> into a Box<[T]>. Use type
704    // ascription when stabilized.
705    fn boxed_slice<T>(slice: Box<[T]>) -> Box<[T]> {
706        slice
707    }
708
709    macro_rules! abox {
710        ($($item:expr),*) => {
711            boxed_slice(Box::new([$($item),*]))
712        }
713    }
714
715    fn compile(regex: &str) -> PosixRegex {
716        PosixRegexBuilder::new(regex.as_bytes())
717            .with_default_classes()
718            .compile()
719            .expect("error compiling regex")
720    }
721    fn matches(regex: &str, input: &str) -> Vec<Box<[Option<(usize, usize)>]>> {
722        compile(regex).matches(input.as_bytes(), None)
723    }
724    fn matches_exact(regex: &str, input: &str) -> Option<Box<[Option<(usize, usize)>]>> {
725        compile(regex).matches_exact(input.as_bytes())
726    }
727
728    #[test]
729    fn basic() {
730        assert!(matches_exact("abc", "abc").is_some());
731        assert!(matches_exact("abc", "bbc").is_none());
732        assert!(matches_exact("abc", "acc").is_none());
733        assert!(matches_exact("abc", "abd").is_none());
734    }
735    #[test]
736    fn repetitions() {
737        assert!(matches_exact("abc*", "ab").is_some());
738        assert!(matches_exact("abc*", "abc").is_some());
739        assert!(matches_exact("abc*", "abccc").is_some());
740
741        assert!(matches_exact(r"a\{1,2\}b", "b").is_none());
742        assert!(matches_exact(r"a\{1,2\}b", "ab").is_some());
743        assert!(matches_exact(r"a\{1,2\}b", "aab").is_some());
744        assert!(matches_exact(r"a\{1,2\}b", "aaab").is_none());
745
746        assert!(matches_exact(r"[abc]\{3\}", "abcTRAILING").is_some());
747        assert!(matches_exact(r"[abc]\{3\}", "abTRAILING").is_none());
748    }
749    #[test]
750    fn any() {
751        assert!(matches_exact(".*", "").is_some());
752        assert!(matches_exact(".*b", "b").is_some());
753        assert!(matches_exact(".*b", "ab").is_some());
754        assert!(matches_exact(".*b", "aaaaab").is_some());
755        assert!(matches_exact(".*b", "HELLO WORLD").is_none());
756        assert!(matches_exact(".*b", "HELLO WORLDb").is_some());
757        assert!(matches_exact("H.*O WORLD", "HELLO WORLD").is_some());
758        assert!(matches_exact("H.*ORLD", "HELLO WORLD").is_some());
759    }
760    #[test]
761    fn brackets() {
762        assert!(matches_exact("[abc]*d", "abcd").is_some());
763        assert!(matches_exact("[0-9]*d", "1234d").is_some());
764        assert!(matches_exact("[[:digit:]]*d", "1234d").is_some());
765        assert!(matches_exact("[[:digit:]]*d", "abcd").is_none());
766    }
767    #[test]
768    fn alternations() {
769        assert!(matches_exact(r"abc\|bcd", "abc").is_some());
770        assert!(matches_exact(r"abc\|bcd", "bcd").is_some());
771        assert!(matches_exact(r"abc\|bcd", "cde").is_none());
772        assert!(matches_exact(r"[A-Z]\+\|yee", "").is_none());
773        assert!(matches_exact(r"[A-Z]\+\|yee", "HELLO").is_some());
774        assert!(matches_exact(r"[A-Z]\+\|yee", "yee").is_some());
775        assert!(matches_exact(r"[A-Z]\+\|yee", "hello").is_none());
776    }
777    #[test]
778    fn offsets() {
779        assert_eq!(matches_exact("abc", "abcd"), Some(abox![Some((0, 3))]));
780        assert_eq!(
781            matches_exact(r"[[:alpha:]]\+", "abcde12345"),
782            Some(abox![Some((0, 5))])
783        );
784        assert_eq!(
785            matches_exact(r"a\(bc\)\+d", "abcbcd"),
786            Some(abox![Some((0, 6)), Some((3, 5))])
787        );
788        assert_eq!(
789            matches_exact(r"hello\( \(world\|universe\) :D\)\?!", "hello world :D!"),
790            Some(abox![Some((0, 15)), Some((5, 14)), Some((6, 11))])
791        );
792        assert_eq!(
793            matches_exact(r"hello\( \(world\|universe\) :D\)\?", "hello world :D"),
794            Some(abox![Some((0, 14)), Some((5, 14)), Some((6, 11))])
795        );
796        assert_eq!(
797            matches_exact(r"\(\<hello\>\) world", "hello world"),
798            Some(abox![Some((0, 11)), Some((0, 5))])
799        );
800        assert_eq!(
801            matches_exact(r".*d", "hid howd ared youd"),
802            Some(abox![Some((0, 18))])
803        );
804        assert_eq!(
805            matches_exact(r".*\(a\)", "bbbbba"),
806            Some(abox![Some((0, 6)), Some((5, 6))])
807        );
808        assert_eq!(
809            matches_exact(r"\(a \(b\) \(c\)\) \(d\)", "a b c d"),
810            Some(abox![
811                Some((0, 7)),
812                Some((0, 5)),
813                Some((2, 3)),
814                Some((4, 5)),
815                Some((6, 7))
816            ])
817        );
818        assert_eq!(
819            matches_exact(r"\(.\)*", "hello"),
820            Some(abox![Some((0, 5)), Some((4, 5))])
821        );
822        assert_eq!(
823            matches(r"h\(i\)", "hello hi lol"),
824            vec![abox![Some((6, 8)), Some((7, 8))]]
825        );
826        assert_eq!(
827            matches_exact(r"\(\([[:alpha:]]\)*\)", "abcdefg"),
828            Some(abox![Some((0, 7)), Some((0, 7)), Some((6, 7))])
829        );
830        assert_eq!(
831            matches_exact(r"\(\.\([[:alpha:]]\)\)*", ".a.b.c.d.e.f.g"),
832            Some(abox![Some((0, 14)), Some((12, 14)), Some((13, 14))])
833        );
834        assert_eq!(
835            matches_exact(r"\(a\|\(b\)\)*\(c\)", "bababac"),
836            Some(abox![
837                Some((0, 7)),
838                Some((5, 6)),
839                Some((4, 5)),
840                Some((6, 7))
841            ])
842        );
843        assert_eq!(
844            matches_exact(r"\(a\|\(b\)\)*\(c\)", "aaac"),
845            Some(abox![Some((0, 4)), Some((2, 3)), None, Some((3, 4))])
846        );
847        assert_eq!(
848            matches_exact(r"a\(\)bc", "abc"),
849            Some(abox![Some((0, 3)), Some((1, 1))])
850        );
851    }
852    #[test]
853    fn matches_is_lazy() {
854        assert_eq!(
855            matches(r"\(hi\)\+", "hello hihi kek"),
856            vec![abox![Some((6, 10)), Some((8, 10))]]
857        );
858        assert_eq!(
859            matches(r"o\+", "helloooooooo woooorld, hooow are you?"),
860            vec![
861                abox![Some((4, 12))],
862                abox![Some((14, 18))],
863                abox![Some((24, 27))],
864                abox![Some((34, 35))]
865            ]
866        );
867        assert_eq!(
868            matches(r"z*", "abc"),
869            vec![
870                abox![Some((0, 0))],
871                abox![Some((1, 1))],
872                abox![Some((2, 2))],
873                abox![Some((3, 3))]
874            ]
875        );
876    }
877    #[test]
878    fn start_and_end() {
879        assert!(matches_exact("^abc$", "abc").is_some());
880        assert!(matches_exact("^bcd", "bcde").is_some());
881        assert!(matches_exact("^bcd", "abcd").is_none());
882        assert!(matches_exact("abc$", "abc").is_some());
883        assert!(matches_exact("abc$", "abcd").is_none());
884
885        assert!(matches_exact(r".*\(^\|a\)c", "c").is_some());
886        assert!(matches_exact(r".*\(^\|a\)c", "ac").is_some());
887        assert!(matches_exact(r".*\(^\|a\)c", "bc").is_none());
888
889        // Tests if ^ can be repeated without issues
890        assert!(matches_exact(".*^^a", "helloabc").is_none());
891        assert!(matches_exact(".*^^a", "abc").is_some());
892    }
893    #[test]
894    fn word_boundaries() {
895        assert!(matches_exact(r"hello\>.world", "hello world").is_some());
896        assert!(matches_exact(r"hello\>.world", "hello!world").is_some());
897        assert!(matches_exact(r"hello\>.world", "hellooworld").is_none());
898
899        assert!(matches_exact(r"hello.\<world", "hello world").is_some());
900        assert!(matches_exact(r"hello.\<world", "hello!world").is_some());
901        assert!(matches_exact(r"hello.\<world", "hellooworld").is_none());
902
903        assert!(matches_exact(r".*\<hello\>", "hihello").is_none());
904        assert!(matches_exact(r".*\<hello\>", "hi_hello").is_none());
905        assert!(matches_exact(r".*\<hello\>", "hi hello").is_some());
906    }
907    #[test]
908    fn groups() {
909        assert!(matches_exact(r"\(a*\)*", "aaaaa").is_some());
910        assert!(matches_exact(r"\(hello\) world", "hello world").is_some());
911        assert!(matches_exact(r"\(a*\|b\|c\)d", "d").is_some());
912        assert!(matches_exact(r"\(a*\|b\|c\)d", "aaaad").is_some());
913        assert!(matches_exact(r"\(a*\|b\|c\)d", "bd").is_some());
914        assert!(matches_exact(r"\(a*\|b\|c\)d", "bbbbbd").is_none());
915    }
916    #[test]
917    fn repeating_groups() {
918        assert!(matches_exact(r"\(a\|b\|c\)*d", "d").is_some());
919        assert!(matches_exact(r"\(a\|b\|c\)*d", "aaaad").is_some());
920        assert!(matches_exact(r"\(a\|b\|c\)*d", "bbbbd").is_some());
921        assert!(matches_exact(r"\(a\|b\|c\)*d", "aabbd").is_some());
922
923        assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "d").is_none());
924        assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "ad").is_some());
925        assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "abd").is_some());
926        assert!(matches_exact(r"\(a\|b\|c\)\{1,2\}d", "abcd").is_none());
927        assert!(matches_exact(r"\(\(a\|b\|c\)\)\{1,2\}d", "abd").is_some());
928        assert!(matches_exact(r"\(\(a\|b\|c\)\)\{1,2\}d", "abcd").is_none());
929        assert!(matches_exact(r"\(\(a\|b\|c\)\{1,2\}\)\{1,2\}d", "abad").is_some());
930        assert!(matches_exact(r"\(\(a\|b\|c\)\{1,2\}\)\{1,2\}d", "ababd").is_some());
931        assert!(matches_exact(r"\(\(a\|b\|c\)\{1,2\}\)\{1,2\}d", "ababad").is_none());
932
933        assert!(matches_exact(r"\(a\|b\|c\)\{4\}d", "ababad").is_none());
934        assert!(matches_exact(r"\(a\|b\|c\)\{4\}d", "ababd").is_some());
935        assert!(matches_exact(r"\(a\|b\|c\)\{4\}d", "abad").is_none());
936
937        assert!(matches_exact(r"\(\([abc]\)\)\{3\}", "abcTRAILING").is_some());
938        assert!(matches_exact(r"\(\([abc]\)\)\{3\}", "abTRAILING").is_none());
939    }
940    #[test]
941    fn backref() {
942        assert!(matches_exact(r"\([abc]\)\1d", "aad").is_some());
943        assert!(matches_exact(r"\([abc]\)\1d", "abd").is_none());
944        assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "abcabcd").is_some());
945        assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "abcbcd").is_none());
946        assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "ababd").is_some());
947        assert!(matches_exact(r"\([abc]\{2,3\}\)\1d", "abacd").is_none());
948
949        assert!(matches_exact(r"\([[:alpha:]]\).*\1d", "hellohd").is_some());
950        assert!(matches_exact(r"\([[:alpha:]]\).*\1d", "hellod").is_none());
951        assert!(matches_exact(r"\([[:alpha:]]\).*\1", "hello").is_none());
952        assert!(matches_exact(r"\([[:alpha:]]\).*\1", "helloh").is_some());
953
954        assert!(matches_exact(r"\(\)-\?\1d", "d").is_some());
955        assert!(matches_exact(r"\(\)-\?\1", "").is_some());
956
957        // Just make sure this doesn't crash it (even though it should error
958        // but I'm too lazy)
959        assert!(matches_exact(r"\(\1\)", "a").is_none());
960
961        assert!(matches_exact(r"\(h.\)\1\+!", "hihihi!").is_some());
962        assert!(matches_exact(r"\(h.\)\1\+!", "hehehe!").is_some());
963        assert!(matches_exact(r"\(h.\)\1\+!", "hahehe!").is_none());
964
965        assert!(matches_exact(
966            r"\(hello \(\<.*\>\) \)*how are you \2",
967            "hello world how are you world"
968        )
969        .is_some());
970        assert!(matches_exact(
971            r"\(hello \(\<.*\>\) \)*how are you \2",
972            "hello universe hello world how are you world"
973        )
974        .is_some());
975        assert!(matches_exact(
976            r"\(hello \(\<.*\>\) \)*how are you \2",
977            "hello world hello universe how are you world"
978        )
979        .is_none());
980    }
981    #[test]
982    fn case_insensitive() {
983        assert!(compile(r"abc[de]")
984            .case_insensitive(true)
985            .matches_exact(b"ABCD")
986            .is_some());
987        assert!(compile(r"abc[de]")
988            .case_insensitive(true)
989            .matches_exact(b"ABCF")
990            .is_none());
991    }
992    #[test]
993    fn newline() {
994        assert_eq!(
995            compile(r"^hello$")
996                .newline(true)
997                .matches(b"hi\nhello\ngreetings", None)
998                .len(),
999            1
1000        );
1001        assert!(compile(r"^hello$")
1002            .newline(true)
1003            .matches(b"hi\ngood day\ngreetings", None)
1004            .is_empty());
1005    }
1006    #[test]
1007    fn no_start_end() {
1008        assert!(compile(r"^hello")
1009            .no_start(true)
1010            .matches_exact(b"hello")
1011            .is_none());
1012        assert!(compile(r"hello$")
1013            .no_end(true)
1014            .matches_exact(b"hello")
1015            .is_none());
1016    }
1017
1018    #[cfg(feature = "bench")]
1019    #[bench]
1020    fn speed_matches_exact(b: &mut Bencher) {
1021        b.iter(|| {
1022            assert!(matches_exact(r"\(\(a*\|b\|c\) test\|yee\)", "aaaaa test").is_some());
1023        })
1024    }
1025    #[cfg(feature = "bench")]
1026    #[bench]
1027    fn speed_matches(b: &mut Bencher) {
1028        b.iter(|| {
1029            assert_eq!(
1030                matches(r"\(\(a*\|b\|c\) test\|yee\)", "oooo aaaaa test").len(),
1031                1
1032            );
1033        })
1034    }
1035}