ad_editor/regex/
matches.rs

1use crate::regex::{Haystack, Regex, vm::N_SLOTS};
2use std::{borrow::Cow, sync::Arc};
3
4/// The match location of a Regex against a given input.
5///
6/// The sub-match indices are relative to the input used to run the original match.
7#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
8pub struct Match {
9    pub(super) n_submatches: usize,
10    pub(super) sub_matches: [usize; N_SLOTS],
11    pub(super) submatch_names: Arc<[String]>,
12}
13
14impl Match {
15    pub(crate) fn synthetic(from: usize, to: usize) -> Self {
16        let mut sub_matches = [0; N_SLOTS];
17        sub_matches[0] = from;
18        sub_matches[1] = to;
19
20        Self {
21            n_submatches: 0,
22            sub_matches,
23            submatch_names: Arc::new([]),
24        }
25    }
26
27    /// Extract this match from the given haystack
28    pub fn match_text<'a, H>(&self, haystack: &'a H) -> Cow<'a, str>
29    where
30        H: Haystack,
31    {
32        let (from, to) = self.loc();
33
34        haystack.substr(from, to)
35    }
36
37    /// Extract the given submatch by index if it exists
38    pub fn submatch_text<'a, H>(&self, n: usize, haystack: &'a H) -> Option<Cow<'a, str>>
39    where
40        H: Haystack,
41    {
42        let (from, to) = self.sub_loc(n)?;
43
44        Some(haystack.substr(from, to))
45    }
46
47    /// Extract the given submatch by name if it exists
48    pub fn submatch_text_by_name<'a, H>(&self, name: &str, haystack: &'a H) -> Option<Cow<'a, str>>
49    where
50        H: Haystack,
51    {
52        let (from, to) = self.sub_loc_by_name(name)?;
53
54        Some(haystack.substr(from, to))
55    }
56
57    /// The names of each submatch
58    pub fn named_matches(&self) -> Vec<&str> {
59        let mut matches = Vec::new();
60        for name in self.submatch_names.iter() {
61            if self.sub_loc_by_name(name).is_some() {
62                matches.push(name.as_str());
63            }
64        }
65
66        matches
67    }
68
69    /// The start and end of this match in terms of byte offsets
70    pub fn loc(&self) -> (usize, usize) {
71        let (start, end) = (self.sub_matches[0], self.sub_matches[1]);
72
73        assert!(
74            start <= end,
75            "invalid match: {start} > {end}: {:?}",
76            self.sub_matches
77        );
78
79        (start, end)
80    }
81
82    fn sub_loc_by_name(&self, name: &str) -> Option<(usize, usize)> {
83        let n = self.submatch_names.iter().position(|s| s == name)?;
84        self.sub_loc(n + 1)
85    }
86
87    pub(crate) fn sub_loc(&self, n: usize) -> Option<(usize, usize)> {
88        if 2 * n + 1 >= N_SLOTS {
89            return None;
90        }
91        let (start, end) = (self.sub_matches[2 * n], self.sub_matches[2 * n + 1]);
92        if n > 0 && start == 0 && end == 0 {
93            return None;
94        }
95
96        assert!(
97            start <= end,
98            "invalid match: {start} > {end}: {:?}",
99            self.sub_matches
100        );
101
102        Some((start, end))
103    }
104
105    pub fn iter_locs(&self) -> impl Iterator<Item = Option<(usize, usize)>> {
106        let mut n = 0;
107
108        std::iter::from_fn(move || {
109            if n > self.n_submatches {
110                None
111            } else {
112                let loc = self.sub_loc(n);
113                n += 1;
114
115                Some(loc)
116            }
117        })
118    }
119}
120
121/// An iterator over sequential, non overlapping matches of a Regex
122/// against a given input
123#[derive(Debug)]
124pub struct MatchIter<'a, H>
125where
126    H: Haystack,
127{
128    pub(super) haystack: &'a H,
129    pub(super) r: &'a mut Regex,
130    pub(super) from: usize,
131}
132
133impl<'a, H> Iterator for MatchIter<'a, H>
134where
135    H: Haystack,
136{
137    type Item = Match;
138
139    fn next(&mut self) -> Option<Self::Item> {
140        let m = self.r.find_from(self.haystack, self.from)?;
141        let (_, from) = m.loc();
142        if from == self.from {
143            self.from += self
144                .haystack
145                .substr_from(from)
146                .unwrap()
147                .chars()
148                .next()
149                .unwrap()
150                .len_utf8();
151        } else {
152            self.from = from;
153        }
154
155        Some(m)
156    }
157}