liquid_lib/stdlib/filters/
html.rs

1use liquid_core::Result;
2use liquid_core::Runtime;
3use liquid_core::{Display_filter, Filter, FilterReflection, ParseFilter};
4use liquid_core::{Value, ValueView};
5use regex::Regex;
6
7/// Returns the number of already escaped characters.
8fn nr_escaped(text: &str) -> usize {
9    for prefix in &["lt;", "gt;", "#39;", "quot;", "amp;"] {
10        if text.starts_with(prefix) {
11            return prefix.len();
12        }
13    }
14    0
15}
16
17// The code is adapted from
18// https://github.com/rust-lang/rust/blob/master/src/librustdoc/html/escape.rs
19// Retrieved 2016-11-19.
20fn escape(input: &dyn ValueView, once_p: bool) -> Result<Value> {
21    if input.is_nil() {
22        return Ok(Value::Nil);
23    }
24    let s = input.to_kstr();
25    let mut result = String::new();
26    let mut last = 0;
27    let mut skip = 0;
28    for (i, c) in s.char_indices() {
29        if skip > 0 {
30            skip -= 1;
31            continue;
32        }
33        match c {
34            '<' | '>' | '\'' | '"' | '&' => {
35                result.push_str(&s[last..i]);
36                last = i + 1;
37                let escaped = match c {
38                    '<' => "&lt;",
39                    '>' => "&gt;",
40                    '\'' => "&#39;",
41                    '"' => "&quot;",
42                    '&' => {
43                        if once_p {
44                            skip = nr_escaped(&s[last..]);
45                        }
46                        if skip == 0 {
47                            "&amp;"
48                        } else {
49                            "&"
50                        }
51                    }
52                    _ => unreachable!(),
53                };
54                result.push_str(escaped);
55            }
56            _ => {}
57        }
58    }
59    if last < s.len() {
60        result.push_str(&s[last..]);
61    }
62    Ok(Value::scalar(result))
63}
64
65#[derive(Clone, ParseFilter, FilterReflection)]
66#[filter(
67    name = "escape",
68    description = "Escapes a string by replacing characters with escape sequences.",
69    parsed(EscapeFilter)
70)]
71pub struct Escape;
72
73#[derive(Debug, Default, Display_filter)]
74#[name = "escape"]
75struct EscapeFilter;
76
77impl Filter for EscapeFilter {
78    fn evaluate(&self, input: &dyn ValueView, _runtime: &dyn Runtime) -> Result<Value> {
79        escape(input, false)
80    }
81}
82
83#[derive(Clone, ParseFilter, FilterReflection)]
84#[filter(
85    name = "escape_once",
86    description = "Escapes a string without changing existing escaped entities.",
87    parsed(EscapeOnceFilter)
88)]
89pub struct EscapeOnce;
90
91#[derive(Debug, Default, Display_filter)]
92#[name = "escape_once"]
93struct EscapeOnceFilter;
94
95impl Filter for EscapeOnceFilter {
96    fn evaluate(&self, input: &dyn ValueView, _runtime: &dyn Runtime) -> Result<Value> {
97        escape(input, true)
98    }
99}
100
101#[derive(Clone, ParseFilter, FilterReflection)]
102#[filter(
103    name = "strip_html",
104    description = "Removes any HTML tags from a string.",
105    parsed(StripHtmlFilter)
106)]
107pub struct StripHtml;
108
109#[derive(Debug, Default, Display_filter)]
110#[name = "strip_html"]
111struct StripHtmlFilter;
112
113static MATCHERS: std::sync::LazyLock<[Regex; 4]> = std::sync::LazyLock::new(|| {
114    [
115        Regex::new(r"(?is)<script.*?</script>").unwrap(),
116        Regex::new(r"(?is)<style.*?</style>").unwrap(),
117        Regex::new(r"(?is)<!--.*?-->").unwrap(),
118        Regex::new(r"(?is)<.*?>").unwrap(),
119    ]
120});
121
122impl Filter for StripHtmlFilter {
123    fn evaluate(&self, input: &dyn ValueView, _runtime: &dyn Runtime) -> Result<Value> {
124        let input = input.to_kstr().into_string();
125
126        let result = MATCHERS.iter().fold(input, |acc, matcher| {
127            matcher.replace_all(&acc, "").into_owned()
128        });
129        Ok(Value::scalar(result))
130    }
131}
132
133#[derive(Clone, ParseFilter, FilterReflection)]
134#[filter(
135    name = "newline_to_br",
136    description = "Replaces every newline (`\\n`) with an HTML line break (`<br>`).",
137    parsed(NewlineToBrFilter)
138)]
139pub struct NewlineToBr;
140
141#[derive(Debug, Default, Display_filter)]
142#[name = "newline_to_br"]
143struct NewlineToBrFilter;
144
145impl Filter for NewlineToBrFilter {
146    fn evaluate(&self, input: &dyn ValueView, _runtime: &dyn Runtime) -> Result<Value> {
147        // TODO handle windows line endings
148        let input = input.to_kstr();
149        Ok(Value::scalar(input.replace('\n', "<br />\n")))
150    }
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    #[test]
158    fn unit_escape() {
159        assert_eq!(
160            liquid_core::call_filter!(Escape, "Have you read 'James & the Giant Peach'?").unwrap(),
161            liquid_core::value!("Have you read &#39;James &amp; the Giant Peach&#39;?")
162        );
163        assert_eq!(
164            liquid_core::call_filter!(Escape, "Tetsuro Takara").unwrap(),
165            liquid_core::value!("Tetsuro Takara")
166        );
167    }
168
169    #[test]
170    fn unit_escape_non_ascii() {
171        assert_eq!(
172            liquid_core::call_filter!(Escape, "word¹ <br> word¹").unwrap(),
173            liquid_core::value!("word¹ &lt;br&gt; word¹")
174        );
175    }
176
177    #[test]
178    fn unit_escape_once() {
179        assert_eq!(
180            liquid_core::call_filter!(EscapeOnce, "1 < 2 & 3").unwrap(),
181            liquid_core::value!("1 &lt; 2 &amp; 3")
182        );
183        assert_eq!(
184            liquid_core::call_filter!(EscapeOnce, "1 &lt; 2 &amp; 3").unwrap(),
185            liquid_core::value!("1 &lt; 2 &amp; 3")
186        );
187        assert_eq!(
188            liquid_core::call_filter!(EscapeOnce, "&lt;&gt;&amp;&#39;&quot;&xyz;").unwrap(),
189            liquid_core::value!("&lt;&gt;&amp;&#39;&quot;&amp;xyz;")
190        );
191    }
192
193    #[test]
194    fn unit_strip_html() {
195        assert_eq!(
196            liquid_core::call_filter!(
197                StripHtml,
198                "<script type=\"text/javascript\">alert('Hi!';</script>",
199            )
200            .unwrap(),
201            liquid_core::value!("")
202        );
203        assert_eq!(
204            liquid_core::call_filter!(
205                StripHtml,
206                "<SCRIPT type=\"text/javascript\">alert('Hi!';</SCRIPT>",
207            )
208            .unwrap(),
209            liquid_core::value!("")
210        );
211        assert_eq!(
212            liquid_core::call_filter!(StripHtml, "<p>test</p>").unwrap(),
213            liquid_core::value!("test")
214        );
215        assert_eq!(
216            liquid_core::call_filter!(StripHtml, "<p id='xxx'>test</p>").unwrap(),
217            liquid_core::value!("test")
218        );
219        assert_eq!(
220            liquid_core::call_filter!(StripHtml, "<style type=\"text/css\">cool style</style>",)
221                .unwrap(),
222            liquid_core::value!("")
223        );
224        assert_eq!(
225            liquid_core::call_filter!(StripHtml, "<p\nclass='loooong'>test</p>").unwrap(),
226            liquid_core::value!("test")
227        );
228        assert_eq!(
229            liquid_core::call_filter!(StripHtml, "<!--\n\tcomment\n-->test").unwrap(),
230            liquid_core::value!("test")
231        );
232        assert_eq!(
233            liquid_core::call_filter!(StripHtml, "").unwrap(),
234            liquid_core::value!("")
235        );
236    }
237
238    #[test]
239    fn unit_newline_to_br() {
240        assert_eq!(
241            liquid_core::call_filter!(NewlineToBr, "a\nb").unwrap(),
242            liquid_core::value!("a<br />\nb")
243        );
244    }
245
246    #[test]
247    fn unit_newline_to_br_hello_world() {
248        // First example from https://shopify.github.io/liquid/filters/newline_to_br/
249        assert_eq!(
250            liquid_core::call_filter!(NewlineToBr, "\nHello\nWorld\n").unwrap(),
251            liquid_core::value!("<br />\nHello<br />\nWorld<br />\n")
252        );
253    }
254
255    #[test]
256    fn unit_newline_to_br_one_argument() {
257        liquid_core::call_filter!(NewlineToBr, "a\nb", 0f64).unwrap_err();
258    }
259}