sayit/
pass.rs

1use std::{borrow::Cow, error::Error, fmt};
2
3use regex_automata::{
4    meta::{BuildError, Regex},
5    util::syntax,
6};
7
8use crate::{tag::Tag, Match};
9
10/// A group of rules with their regexes combined into one
11#[derive(Clone)]
12pub struct Pass {
13    regexes: Vec<String>,
14    tags: Vec<Box<dyn Tag>>,
15    multi_regex: Regex,
16}
17
18// skips 20 pages of debug output of `multi_regex` field
19#[allow(clippy::missing_fields_in_debug)]
20impl fmt::Debug for Pass {
21    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
22        f.debug_struct("Pass")
23            .field("patterns", &self.regexes)
24            .field("tags", &self.tags)
25            .finish()
26    }
27}
28
29impl Pass {
30    /// Creates new instance from vec of regex and tag pairs
31    #[allow(clippy::result_large_err)]
32    pub fn new(rules: Vec<(String, Box<dyn Tag>)>) -> Result<Self, CreationError> {
33        let (patterns, tags): (Vec<_>, Vec<_>) = rules.into_iter().unzip();
34
35        let multi_regex = Regex::builder()
36            .syntax(
37                syntax::Config::new()
38                    .multi_line(true)
39                    .case_insensitive(true),
40            )
41            .build_many(&patterns)
42            .map_err(CreationError::BadRegex)?;
43
44        Ok(Self {
45            regexes: patterns,
46            multi_regex,
47            tags,
48        })
49    }
50
51    /// Merges it's own regexes with other. Tags for existing regexes are replaced while new ones
52    /// are placed at the end of resulting new Pass
53    #[allow(clippy::result_large_err)]
54    pub fn extend(&self, other: Pass) -> Result<Self, CreationError> {
55        let mut existing_rules: Vec<_> = self
56            .regexes
57            .iter()
58            .cloned()
59            .zip(self.tags.clone())
60            .collect();
61
62        let mut appended_rules = Vec::new();
63
64        'outer: for (new_regex, new_tag) in other.regexes.into_iter().zip(other.tags.into_iter()) {
65            for (existing_regex, existing_tag) in &mut existing_rules {
66                if new_regex == **existing_regex {
67                    *existing_tag = new_tag;
68                    continue 'outer;
69                }
70            }
71
72            appended_rules.push((new_regex, new_tag));
73        }
74
75        existing_rules.extend(appended_rules);
76
77        Self::new(existing_rules)
78    }
79
80    /// Produces string with all non-overlapping regexes replaced by corresponding tags
81    #[must_use]
82    pub fn apply<'a>(&self, text: &'a str) -> Cow<'a, str> {
83        let all_captures: Vec<_> = self.multi_regex.captures_iter(text).collect();
84
85        if all_captures.is_empty() {
86            return Cow::Borrowed(text);
87        }
88
89        let mut last_replacement = 0;
90        let mut output = String::with_capacity(text.len());
91
92        for caps in all_captures {
93            // SAFETY: these captures come from matches. The only way this can fail is if they were
94            //         created manually with Captures::empty()
95            let caps_match = unsafe { caps.get_match().unwrap_unchecked() };
96
97            let range = caps_match.range();
98            let tag = &self.tags[caps_match.pattern()];
99
100            let repl = tag.generate(&Match {
101                captures: caps,
102                input: text,
103            });
104
105            output.push_str(&text[last_replacement..range.start]);
106            output.push_str(&repl);
107
108            last_replacement = range.end;
109        }
110
111        output.push_str(&text[last_replacement..]);
112
113        Cow::Owned(output)
114    }
115}
116
117#[derive(Debug)]
118pub enum CreationError {
119    BadRegex(BuildError),
120}
121
122impl fmt::Display for CreationError {
123    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124        match self {
125            CreationError::BadRegex(err) => {
126                let mut msg = err.to_string();
127                if let Some(syntax_msg) = err.syntax_error() {
128                    msg = format!("msg: {syntax_msg}");
129                }
130
131                write!(f, "regex combination failed: {msg}")
132            }
133        }
134    }
135}
136
137impl Error for CreationError {}
138
139#[cfg(test)]
140mod tests {
141    use crate::tag_impls::Literal;
142
143    use super::Pass;
144
145    impl PartialEq for Pass {
146        fn eq(&self, other: &Self) -> bool {
147            self.regexes == other.regexes && self.tags == other.tags
148        }
149    }
150
151    #[test]
152    fn rules_replaced() {
153        let old = Pass::new(vec![
154            ("old".to_string(), Literal::new_boxed("old")),
155            ("old2".to_string(), Literal::new_boxed("old2")),
156        ])
157        .unwrap();
158
159        let new = Pass::new(vec![("old".to_string(), Literal::new_boxed("new"))]).unwrap();
160
161        let extended = old.extend(new).unwrap();
162        let expected = Pass::new(vec![
163            ("old".to_string(), Literal::new_boxed("new")),
164            ("old2".to_string(), Literal::new_boxed("old2")),
165        ])
166        .unwrap();
167
168        assert_eq!(extended, expected);
169    }
170
171    #[test]
172    fn rules_appended() {
173        let old = Pass::new(vec![("existing".to_string(), Literal::new_boxed("old"))]).unwrap();
174        let new = Pass::new(vec![("added".to_string(), Literal::new_boxed("new"))]).unwrap();
175
176        let extended = old.extend(new).unwrap();
177        let expected = Pass::new(vec![
178            ("existing".to_string(), Literal::new_boxed("old")),
179            ("added".to_string(), Literal::new_boxed("new")),
180        ])
181        .unwrap();
182
183        assert_eq!(extended, expected);
184    }
185}