vec_reg_common/
lib.rs

1mod regex;
2
3use std::{collections::HashMap, ops::Range, rc::Rc};
4
5pub use regex::Regex;
6
7#[cfg(not(target_env = "msvc"))]
8use tikv_jemallocator::Jemalloc;
9
10#[cfg(not(target_env = "msvc"))]
11#[global_allocator]
12static GLOBAL: Jemalloc = Jemalloc;
13
14#[derive(Debug)]
15pub struct Match<'t, I> {
16    input: &'t [I],
17    start: usize,
18    end: usize,
19}
20
21impl<'t, I> Match<'t, I> {
22    /// Returns the starting offset of the match in the haystack.
23    pub fn start(&self) -> usize {
24        self.start
25    }
26
27    /// Returns the ending offset of the match in the haystack.
28    pub fn end(&self) -> usize {
29        self.end
30    }
31
32    /// Returns the range over the starting and ending offsets of the match in the haystack.
33    pub fn range(&self) -> Range<usize> {
34        self.start..self.end
35    }
36
37    /// Returns the matched sub-slice.
38    pub fn values(&self) -> &'t [I] {
39        &self.input[self.range()]
40    }
41}
42
43#[derive(Debug)]
44struct CaptureLocation {
45    pub start: usize,
46    pub end: usize,
47}
48
49/// Captures represents a group of captured sub-slice for a single match.
50///
51/// The 0th capture always corresponds to the entire match. Each subsequent index corresponds to the next capture group in the regex.
52///  If a capture group is named, then the matched sub-slice is also available via the name method.
53///  (Note that the 0th capture is always unnamed and so must be accessed with the get method.)
54///
55/// `'t` is the lifetime of the matched slice.
56#[derive(Debug)]
57pub struct Captures<'t, I> {
58    input: &'t [I],
59    capture_locations: Vec<CaptureLocation>,
60    named_capture_index: Rc<HashMap<String, usize>>,
61}
62
63impl<'t, I> Captures<'t, I> {
64    /// Returns the match associated with the capture group at index i.
65    /// If i does not correspond to a capture group, or if the capture group did not participate in the match, then None is returned.
66    pub fn get(&self, index: usize) -> Option<Match<'t, I>> {
67        self.capture_locations.get(index).map(|location| Match {
68            input: self.input,
69            start: location.start,
70            end: location.end,
71        })
72    }
73
74    /// Returns the match for the capture group named name. If name isn’t a valid capture group or didn’t match anything, then None is returned.
75    pub fn name(&self, name: &str) -> Option<Match<'t, I>> {
76        self.named_capture_index
77            .get(name)
78            .and_then(|idx| self.get(*idx))
79    }
80
81    /// Returns the total number of capture groups (even if they didn’t match).
82    /// This is always at least 1, since every regex has at least one capture group that corresponds to the full match.
83    #[allow(clippy::len_without_is_empty)]
84    pub fn len(&self) -> usize {
85        self.capture_locations.len()
86    }
87}
88
89pub trait CompiledRegex<I> {
90    /// Returns true if and only if there is a match for the regex in the slice given.
91    fn is_match(&self, input: &[I]) -> bool;
92
93    /// Returns true if and only if match the entire input slice.
94    fn is_full_match(&self, input: &[I]) -> bool {
95        if let Some(found_match) = self.find(input) {
96            found_match.start() == 0 && found_match.end() == input.len()
97        } else {
98            false
99        }
100    }
101
102    /// Returns the start and end range of the leftmost-first match in slice. If no match exists, then None is returned.
103    fn find<'t>(&self, input: &'t [I]) -> Option<Match<'t, I>>;
104
105    /// Returns the capture groups corresponding to the leftmost-first match in text.
106    /// Capture group 0 always corresponds to the entire match. If no match is found, then None is returned.
107    fn captures<'t>(&self, input: &'t [I]) -> Option<Captures<'t, I>>;
108}