maybe_regex/
lib.rs

1use crate::utils::{remove_first_n_chars, remove_last_n_chars};
2use lazy_static::lazy_static;
3use log::error;
4use regex::{Captures, Regex, RegexBuilder, Replacer};
5use std::{cmp::Ordering, fmt::Display};
6
7mod utils;
8
9lazy_static! {
10    // Simplistic check to see if a string is likely a regex.
11    // TODO: is there a way to make this actually correct?
12    static ref REGEX_REGEX: Regex = Regex::new(r"[\\b\$\^\[\]\+\*\.]").unwrap();
13}
14
15#[derive(Debug, Clone)]
16pub struct MaybeRegex {
17    data: TagWrapperData,
18    original: String,
19    pub is_negative: bool,
20    case_sensitive: bool,
21}
22
23impl PartialEq for MaybeRegex {
24    fn eq(&self, other: &Self) -> bool {
25        self.original == other.original && self.is_negative == other.is_negative
26    }
27}
28
29impl PartialOrd for MaybeRegex {
30    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
31        (&self.original, self.is_negative).partial_cmp(&(&other.original, other.is_negative))
32    }
33}
34
35#[derive(Debug, Clone)]
36pub enum TagWrapperData {
37    Raw(String),
38    Regex(Regex),
39}
40
41impl MaybeRegex {
42    pub fn new<S: AsRef<str>>(s: S) -> Self {
43        Self::from(s)
44    }
45
46    pub fn from<S: AsRef<str>>(s: S) -> Self {
47        let s = s.as_ref();
48        let (s, is_negative) = if s.starts_with("-") {
49            (remove_first_n_chars(s, 1), true)
50        } else if s.ends_with("-") {
51            (remove_last_n_chars(s, 1), true)
52        } else {
53            (s.into(), false)
54        };
55
56        match get_regex(&s) {
57            Some(regex) => Self {
58                data: TagWrapperData::Regex(regex),
59                original: s,
60                is_negative,
61                case_sensitive: false,
62            },
63            None => Self {
64                data: TagWrapperData::Raw(s.clone()),
65                original: s,
66                is_negative,
67                case_sensitive: false,
68            },
69        }
70    }
71
72    pub fn as_case_sensitive(mut self) -> Self {
73        self.case_sensitive = true;
74        self
75    }
76
77    pub fn is_regex(&self) -> bool {
78        match &self.data {
79            TagWrapperData::Raw(_) => false,
80            TagWrapperData::Regex(_) => true,
81        }
82    }
83
84    pub fn matches<S: AsRef<str>>(&self, haystack: S) -> bool {
85        let matches = self.is_contained_within(haystack);
86        if self.is_negative {
87            return !matches;
88        }
89        matches
90    }
91
92    // You likely want matches, which considers whether the input is "negative" or not.
93    // This ignores that and just returns whether the needle is found inside the haystack.
94    pub fn is_contained_within<S: AsRef<str>>(&self, haystack: S) -> bool {
95        let haystack = if self.case_sensitive {
96            haystack.as_ref()
97        } else {
98            &haystack.as_ref().to_lowercase()
99        };
100
101        match &self.data {
102            TagWrapperData::Raw(value) => haystack.contains(value),
103            TagWrapperData::Regex(regex) => regex.is_match(haystack),
104        }
105    }
106
107    pub fn replace(&self, str: String, to_string: impl Fn(&str) -> String + 'static) -> String {
108        let mut output = str;
109        match &self.data {
110            TagWrapperData::Raw(value) => {
111                let replacement = to_string(value);
112                output = output.replace(value, &replacement);
113            }
114            TagWrapperData::Regex(regex) => {
115                let highlighter = Highlighter {
116                    to_string_cb: Box::new(to_string),
117                };
118
119                // TODO: Silly hack since replace_all doesn't seem to span multiple lines
120                output = output.replace("\n", "abcdefg");
121                output = regex.replace_all(&output, highlighter).to_string();
122                output = output.replace("abcdefg", "\n");
123            }
124        };
125        output
126    }
127
128    pub fn to_str(&self) -> &str {
129        self.original.as_str()
130    }
131
132    pub fn match_indices<S: AsRef<str>>(&self, other: S) -> Vec<(usize, usize)> {
133        let other = if self.case_sensitive {
134            other.as_ref()
135        } else {
136            &other.as_ref().to_lowercase()
137        };
138
139        match &self.data {
140            TagWrapperData::Raw(value) => other
141                .match_indices(value)
142                .map(|(index, _)| (index, value.len()))
143                .collect(),
144            TagWrapperData::Regex(regex) => regex
145                .find_iter(other)
146                .map(|some_match| (some_match.start(), some_match.len()))
147                .collect(),
148        }
149    }
150
151    pub fn matches_exactly<S: AsRef<str>>(&self, other: S) -> bool {
152        let other = if self.case_sensitive {
153            other.as_ref()
154        } else {
155            &other.as_ref().to_lowercase()
156        };
157
158        match &self.data {
159            TagWrapperData::Raw(value) => other == *value,
160            TagWrapperData::Regex(regex) => {
161                if let Some(found) = regex.find(other) {
162                    return found.len() == other.len();
163                }
164                false
165            }
166        }
167    }
168
169    pub fn starts_with<S: AsRef<str>>(&self, s: S) -> bool {
170        let s = if self.case_sensitive {
171            s.as_ref()
172        } else {
173            &s.as_ref().to_lowercase()
174        };
175
176        match &self.data {
177            TagWrapperData::Raw(value) => value.starts_with(s),
178            TagWrapperData::Regex(regex) => {
179                if let Some(found) = regex.find(s) {
180                    return found.start() == 0;
181                }
182                false
183            }
184        }
185    }
186}
187
188impl Display for MaybeRegex {
189    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190        write!(f, "{}", self.to_str())
191    }
192}
193
194fn get_regex(s: &str) -> Option<Regex> {
195    if REGEX_REGEX.is_match(s) {
196        match RegexBuilder::new(s).case_insensitive(true).build() {
197            Ok(regex) => {
198                return Some(regex);
199            }
200            Err(_e) => {
201                error!("Bad regex: {s}");
202            }
203        }
204    }
205    None
206}
207
208struct Highlighter {
209    to_string_cb: Box<dyn Fn(&str) -> String>,
210}
211
212impl Replacer for Highlighter {
213    fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
214        let temp = caps.get(0).map_or("", |m| m.as_str()).to_string();
215        let rv = (*self.to_string_cb)(&temp);
216        dst.push_str(&rv);
217    }
218}
219
220#[cfg(test)]
221mod test {
222    use super::*;
223
224    #[test]
225    fn detects_regexes() {
226        assert!(MaybeRegex::new("This is a regex.*").is_regex());
227        assert!(MaybeRegex::new(".*This is a regex").is_regex());
228        assert!(MaybeRegex::new(".This is a regex").is_regex());
229        assert!(MaybeRegex::new("This is a regex [0-9]").is_regex());
230    }
231
232    #[test]
233    fn detects_non_regexes() {
234        assert!(!MaybeRegex::new("This is not a regex").is_regex());
235        assert!(!MaybeRegex::new("This is not a regex?").is_regex());
236        assert!(!MaybeRegex::new("This is not a regex [").is_regex());
237        assert!(!MaybeRegex::new("This is not a regex [0-9").is_regex());
238    }
239
240    #[test]
241    fn contains_works() {
242        assert!(!MaybeRegex::new("z").is_contained_within("Hello"));
243        assert!(!MaybeRegex::new("e$").is_contained_within("Hello"));
244
245        assert!(MaybeRegex::new("e").is_contained_within("Hello"));
246        assert!(MaybeRegex::new("o$").is_contained_within("Hello"));
247    }
248
249    #[test]
250    fn negative_works() {
251        assert!(MaybeRegex::new("-e").is_contained_within("Hello"));
252        assert!(!MaybeRegex::new("-e").matches("Hello"));
253
254        assert!(MaybeRegex::new("-o$").is_contained_within("Hello"));
255        assert!(!MaybeRegex::new("-o$").matches("Hello"));
256    }
257
258    #[test]
259    fn all_string_types_work() {
260        assert!(MaybeRegex::new("e").is_contained_within("Hello"));
261        assert!(MaybeRegex::new(String::from("e")).is_contained_within("Hello"));
262        assert!(MaybeRegex::new(&String::from("e")).is_contained_within("Hello"));
263    }
264
265    #[test]
266    fn is_case_insensitive() {
267        assert!(MaybeRegex::new(".*O").is_contained_within("hello"));
268        assert!(MaybeRegex::new(String::from("e")).is_contained_within("Hello"));
269        assert!(MaybeRegex::new(&String::from("e")).is_contained_within("Hello"));
270    }
271
272    #[test]
273    fn implements_to_string() {
274        assert!(MaybeRegex::new("howdy").to_string() == String::from("howdy"));
275    }
276}