vec_reg_common/lib.rs
1mod regex;
2
3use std::{collections::HashMap, ops::Range, rc::Rc};
4
5pub use regex::Regex;
6
7#[cfg(not(target_env = "msvc"))]
8use tikv_jemallocator::Jemalloc;
9
10#[cfg(not(target_env = "msvc"))]
11#[global_allocator]
12static GLOBAL: Jemalloc = Jemalloc;
13
14#[derive(Debug)]
15pub struct Match<'t, I> {
16 input: &'t [I],
17 start: usize,
18 end: usize,
19}
20
21impl<'t, I> Match<'t, I> {
22 /// Returns the starting offset of the match in the haystack.
23 pub fn start(&self) -> usize {
24 self.start
25 }
26
27 /// Returns the ending offset of the match in the haystack.
28 pub fn end(&self) -> usize {
29 self.end
30 }
31
32 /// Returns the range over the starting and ending offsets of the match in the haystack.
33 pub fn range(&self) -> Range<usize> {
34 self.start..self.end
35 }
36
37 /// Returns the matched sub-slice.
38 pub fn values(&self) -> &'t [I] {
39 &self.input[self.range()]
40 }
41}
42
43#[derive(Debug)]
44struct CaptureLocation {
45 pub start: usize,
46 pub end: usize,
47}
48
49/// Captures represents a group of captured sub-slice for a single match.
50///
51/// The 0th capture always corresponds to the entire match. Each subsequent index corresponds to the next capture group in the regex.
52/// If a capture group is named, then the matched sub-slice is also available via the name method.
53/// (Note that the 0th capture is always unnamed and so must be accessed with the get method.)
54///
55/// `'t` is the lifetime of the matched slice.
56#[derive(Debug)]
57pub struct Captures<'t, I> {
58 input: &'t [I],
59 capture_locations: Vec<CaptureLocation>,
60 named_capture_index: Rc<HashMap<String, usize>>,
61}
62
63impl<'t, I> Captures<'t, I> {
64 /// Returns the match associated with the capture group at index i.
65 /// If i does not correspond to a capture group, or if the capture group did not participate in the match, then None is returned.
66 pub fn get(&self, index: usize) -> Option<Match<'t, I>> {
67 self.capture_locations.get(index).map(|location| Match {
68 input: self.input,
69 start: location.start,
70 end: location.end,
71 })
72 }
73
74 /// Returns the match for the capture group named name. If name isn’t a valid capture group or didn’t match anything, then None is returned.
75 pub fn name(&self, name: &str) -> Option<Match<'t, I>> {
76 self.named_capture_index
77 .get(name)
78 .and_then(|idx| self.get(*idx))
79 }
80
81 /// Returns the total number of capture groups (even if they didn’t match).
82 /// This is always at least 1, since every regex has at least one capture group that corresponds to the full match.
83 #[allow(clippy::len_without_is_empty)]
84 pub fn len(&self) -> usize {
85 self.capture_locations.len()
86 }
87}
88
89pub trait CompiledRegex<I> {
90 /// Returns true if and only if there is a match for the regex in the slice given.
91 fn is_match(&self, input: &[I]) -> bool;
92
93 /// Returns true if and only if match the entire input slice.
94 fn is_full_match(&self, input: &[I]) -> bool {
95 if let Some(found_match) = self.find(input) {
96 found_match.start() == 0 && found_match.end() == input.len()
97 } else {
98 false
99 }
100 }
101
102 /// Returns the start and end range of the leftmost-first match in slice. If no match exists, then None is returned.
103 fn find<'t>(&self, input: &'t [I]) -> Option<Match<'t, I>>;
104
105 /// Returns the capture groups corresponding to the leftmost-first match in text.
106 /// Capture group 0 always corresponds to the entire match. If no match is found, then None is returned.
107 fn captures<'t>(&self, input: &'t [I]) -> Option<Captures<'t, I>>;
108}