Skip to main content

plf_contrib/
regex.rs

1use std::collections::HashMap;
2use std::sync::{LazyLock, RwLock};
3
4use regex::Regex;
5use plf::{Filter, Kwargs, State, TeraResult, Test};
6
7static STRIPTAGS_RE: LazyLock<Regex> =
8    LazyLock::new(|| Regex::new(r"(<!--.*?-->|<[^>]*>)").unwrap());
9
10static SPACELESS_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r">\s+<").unwrap());
11
12/// Tries to remove HTML tags from input. Does not guarantee well-formed output if input is not valid HTML.
13///
14/// If value is "<b>Joel</b>", the output will be "Joel".
15/// Note that if the template you are using it in is automatically escaped, you will need to call the safe filter after striptags.
16///
17/// ```text
18/// {{ value | striptags }}
19/// ```
20pub fn striptags(val: &str, _: Kwargs, _: &State) -> String {
21    STRIPTAGS_RE.replace_all(val, "").into_owned()
22}
23
24/// Remove space ( ) and line breaks (\n or \r\n) between HTML tags.
25///
26/// If the value is "<p>\n<a> </a>\r\n </p>", the output will be "<p><a></a></p>".
27/// Note that only whitespace between successive opening tags and successive closing tags is removed.
28/// Also note that if the template you are using it in is automatically escaped, you will need to call the safe filter after spaceless.
29///
30/// ```text
31/// {{ value | spaceless }}
32/// ```
33pub fn spaceless(val: &str, _: Kwargs, _: &State) -> String {
34    SPACELESS_RE.replace_all(val, "><").into_owned()
35}
36
37fn get_or_create_regex(cache: &RwLock<HashMap<String, Regex>>, pattern: &str) -> TeraResult<Regex> {
38    if let Some(r) = cache.read().unwrap().get(pattern) {
39        return Ok(r.clone());
40    }
41
42    let mut cache = cache.write().unwrap();
43
44    let regex = match Regex::new(pattern) {
45        Ok(regex) => regex,
46        Err(e) => return Err(tera::Error::message(format!("Invalid regex: {e}"))),
47    };
48
49    cache.insert(String::from(pattern), regex.clone());
50    Ok(regex)
51}
52
53/// Returns true if the given variable is a string and matches the regex in the `pat` argument.
54/// The regex will only be compiled once.
55///
56/// ```text
57/// {% if value is matching(pat="^hello") %}...{% endif %}
58/// ```
59#[derive(Debug, Default)]
60pub struct Matching {
61    cache: RwLock<HashMap<String, Regex>>,
62}
63
64impl Test<&str, TeraResult<bool>> for Matching {
65    fn call(&self, val: &str, kwargs: Kwargs, _: &State) -> TeraResult<bool> {
66        let pat = kwargs.must_get::<&str>("pat")?;
67        let regex = get_or_create_regex(&self.cache, pat)?;
68        Ok(regex.is_match(val))
69    }
70}
71
72/// Takes 2 mandatory string named arguments: `pattern` (regex pattern) and `rep`.
73/// This will replace all occurrences of `pattern` with `rep`.
74/// The regex will only be compiled once.
75///
76/// ```text
77/// {{ value | regex_replace(pattern="\d+", rep="") }}
78/// ```
79#[derive(Debug, Default)]
80pub struct RegexReplace {
81    cache: RwLock<HashMap<String, Regex>>,
82}
83
84impl Filter<&str, TeraResult<String>> for RegexReplace {
85    fn call(&self, val: &str, kwargs: Kwargs, _: &State) -> TeraResult<String> {
86        let pattern = kwargs.must_get::<&str>("pattern")?;
87        let rep = kwargs.must_get::<&str>("rep")?;
88        let regex = get_or_create_regex(&self.cache, pattern)?;
89        Ok(regex.replace_all(val, rep).into_owned())
90    }
91}
92
93#[cfg(test)]
94mod tests {
95    use super::*;
96    use std::sync::Arc;
97    use plf::Context;
98    use plf::value::Map;
99
100    #[test]
101    fn test_striptags() {
102        let tests = vec![
103            (
104                r"<b>Joel</b> <button>is</button> a <span>slug</span>",
105                "Joel is a slug",
106            ),
107            (
108                r#"<p>just a small   \n <a href="x"> example</a> link</p>\n<p>to a webpage</p><!-- <p>and some commented stuff</p> -->"#,
109                r#"just a small   \n  example link\nto a webpage"#,
110            ),
111            (
112                r"<p>See: &#39;&eacute; is an apostrophe followed by e acute</p>",
113                r"See: &#39;&eacute; is an apostrophe followed by e acute",
114            ),
115            (r"<adf>a", "a"),
116            (r"</adf>a", "a"),
117            (r"<asdf><asdf>e", "e"),
118            (r"hi, <f x", "hi, <f x"),
119            ("234<235, right?", "234<235, right?"),
120            ("a4<a5 right?", "a4<a5 right?"),
121            ("b7>b2!", "b7>b2!"),
122            ("</fe", "</fe"),
123            ("<x>b<y>", "b"),
124            (r#"a<p a >b</p>c"#, "abc"),
125            (r#"d<a:b c:d>e</p>f"#, "def"),
126            (
127                r#"<strong>foo</strong><a href="http://example.com">bar</a>"#,
128                "foobar",
129            ),
130        ];
131        for (input, expected) in tests {
132            let ctx = Context::new();
133            let state = State::new(&ctx);
134            let res = striptags(input, Kwargs::default(), &state);
135            assert_eq!(expected, res);
136        }
137    }
138
139    #[test]
140    fn test_spaceless() {
141        let tests = vec![
142            ("<p>\n<a>test</a>\r\n </p>", "<p><a>test</a></p>"),
143            ("<p>\n<a> </a>\r\n </p>", "<p><a></a></p>"),
144            ("<p> </p>", "<p></p>"),
145            ("<p> <a>", "<p><a>"),
146            ("<p> test</p>", "<p> test</p>"),
147            ("<p>\r\n</p>", "<p></p>"),
148        ];
149        for (input, expected) in tests {
150            let ctx = Context::new();
151            let state = State::new(&ctx);
152            let res = spaceless(input, Kwargs::default(), &state);
153            assert_eq!(expected, res);
154        }
155    }
156
157    #[test]
158    fn test_matching() {
159        let inputs = vec![
160            ("abc", "b", true),
161            ("abc", "^b$", false),
162            ("Hello, World!", r"(?i)(hello\W\sworld\W)", true),
163            ("The date was 2018-06-28", r"\d{4}-\d{2}-\d{2}$", true),
164        ];
165
166        for (input, pat, expected) in inputs {
167            let matching = Matching::default();
168            let mut map = Map::new();
169            map.insert("pat".into(), pat.into());
170            let kwargs = Kwargs::new(Arc::new(map));
171            let ctx = Context::new();
172            let res = matching.call(input, kwargs, &State::new(&ctx)).unwrap();
173            assert_eq!(expected, res);
174        }
175    }
176
177    #[test]
178    fn test_regex_replace() {
179        let regex_replace = RegexReplace::default();
180        let ctx = Context::new();
181        let state = State::new(&ctx);
182
183        // Basic replacement with capture groups
184        let mut map = Map::new();
185        map.insert(
186            "pattern".into(),
187            r"(?P<last>[^,\s]+),\s+(?P<first>\S+)".into(),
188        );
189        map.insert("rep".into(), "$first $last".into());
190        let kwargs = Kwargs::new(Arc::new(map));
191        let result = regex_replace
192            .call("Springsteen, Bruce", kwargs, &state)
193            .unwrap();
194        assert_eq!(result, "Bruce Springsteen");
195
196        // Simple replacement
197        let mut map = Map::new();
198        map.insert("pattern".into(), r"\d+".into());
199        map.insert("rep".into(), "X".into());
200        let kwargs = Kwargs::new(Arc::new(map));
201        let result = regex_replace.call("abc123def456", kwargs, &state).unwrap();
202        assert_eq!(result, "abcXdefX");
203
204        // No match returns original
205        let mut map = Map::new();
206        map.insert("pattern".into(), r"zzz".into());
207        map.insert("rep".into(), "X".into());
208        let kwargs = Kwargs::new(Arc::new(map));
209        let result = regex_replace.call("hello world", kwargs, &state).unwrap();
210        assert_eq!(result, "hello world");
211    }
212
213    #[test]
214    fn test_regex_replace_invalid_pattern() {
215        let regex_replace = RegexReplace::default();
216        let ctx = Context::new();
217        let state = State::new(&ctx);
218
219        let mut map = Map::new();
220        map.insert("pattern".into(), r"[invalid".into());
221        map.insert("rep".into(), "X".into());
222        let kwargs = Kwargs::new(Arc::new(map));
223        let result = regex_replace.call("test", kwargs, &state);
224        assert!(result.is_err());
225    }
226
227    #[test]
228    fn test_register() {
229        let mut tera = plf::Tera::default();
230        tera.register_filter("striptags", striptags);
231        tera.register_filter("spaceless", spaceless);
232        tera.register_filter("regex_replace", RegexReplace::default());
233        tera.register_test("matching", Matching::default());
234    }
235}