minijinja/
utils.rs

1use std::char::decode_utf16;
2use std::collections::BTreeMap;
3use std::fmt;
4use std::iter::{once, repeat};
5use std::str::Chars;
6
7use crate::error::{Error, ErrorKind};
8use crate::value::{StringType, Value, ValueIter, ValueKind, ValueRepr};
9use crate::Output;
10
11/// internal marker to seal up some trait methods
12pub struct SealedMarker;
13
14pub fn memchr(haystack: &[u8], needle: u8) -> Option<usize> {
15    haystack.iter().position(|&x| x == needle)
16}
17
18pub fn memstr(haystack: &[u8], needle: &[u8]) -> Option<usize> {
19    haystack
20        .windows(needle.len())
21        .position(|window| window == needle)
22}
23
24/// Helper for dealing with untrusted size hints.
25#[inline(always)]
26pub(crate) fn untrusted_size_hint(value: usize) -> usize {
27    value.min(1024)
28}
29
30fn write_with_html_escaping(out: &mut Output, value: &Value) -> fmt::Result {
31    if matches!(
32        value.kind(),
33        ValueKind::Undefined | ValueKind::None | ValueKind::Bool | ValueKind::Number
34    ) {
35        write!(out, "{value}")
36    } else if let Some(s) = value.as_str() {
37        write!(out, "{}", HtmlEscape(s))
38    } else {
39        write!(out, "{}", HtmlEscape(&value.to_string()))
40    }
41}
42
43fn invalid_autoescape(name: &str) -> Result<(), Error> {
44    Err(Error::new(
45        ErrorKind::InvalidOperation,
46        format!("Default formatter does not know how to format to custom format '{name}'"),
47    ))
48}
49
50#[inline(always)]
51pub fn write_escaped(
52    out: &mut Output,
53    auto_escape: AutoEscape,
54    value: &Value,
55) -> Result<(), Error> {
56    // common case of safe strings or strings without auto escaping
57    if let ValueRepr::String(ref s, ty) = value.0 {
58        if matches!(ty, StringType::Safe) || matches!(auto_escape, AutoEscape::None) {
59            return out.write_str(s).map_err(Error::from);
60        }
61    }
62
63    match auto_escape {
64        AutoEscape::None => write!(out, "{value}").map_err(Error::from),
65        AutoEscape::Html => write_with_html_escaping(out, value).map_err(Error::from),
66        #[cfg(feature = "json")]
67        AutoEscape::Json => {
68            let value = ok!(serde_json::to_string(&value).map_err(|err| {
69                Error::new(ErrorKind::BadSerialization, "unable to format to JSON").with_source(err)
70            }));
71            write!(out, "{value}").map_err(Error::from)
72        }
73        AutoEscape::Custom(name) => invalid_autoescape(name),
74    }
75}
76
77/// Controls the autoescaping behavior.
78///
79/// For more information see
80/// [`set_auto_escape_callback`](crate::Environment::set_auto_escape_callback).
81#[derive(Debug, Copy, Clone, PartialEq, Eq)]
82#[non_exhaustive]
83pub enum AutoEscape {
84    /// Do not apply auto escaping.
85    None,
86    /// Use HTML auto escaping rules.
87    ///
88    /// Any value will be converted into a string and the following characters
89    /// will be escaped in ways compatible to XML and HTML: `<`, `>`, `&`, `"`,
90    /// `'`, and `/`.
91    Html,
92    /// Use escaping rules suitable for JSON/JavaScript or YAML.
93    ///
94    /// Any value effectively ends up being serialized to JSON upon printing.  The
95    /// serialized values will be compatible with JavaScript and YAML as well.
96    #[cfg(feature = "json")]
97    #[cfg_attr(docsrs, doc(cfg(feature = "json")))]
98    Json,
99    /// A custom auto escape format.
100    ///
101    /// The default formatter does not know how to deal with a custom escaping
102    /// format and would error.  The use of these requires a custom formatter.
103    /// See [`set_formatter`](crate::Environment::set_formatter).
104    Custom(&'static str),
105}
106
107/// Defines the behavior of undefined values in the engine.
108///
109/// At present there are three types of behaviors available which mirror the behaviors
110/// that Jinja2 provides out of the box.
111#[derive(Debug, Copy, Clone, PartialEq, Eq)]
112#[non_exhaustive]
113pub enum UndefinedBehavior {
114    /// The default, somewhat lenient undefined behavior.
115    ///
116    /// * **printing:** allowed (returns empty string)
117    /// * **iteration:** allowed (returns empty array)
118    /// * **attribute access of undefined values:** fails
119    Lenient,
120    /// Like `Lenient`, but also allows chaining of undefined lookups.
121    ///
122    /// * **printing:** allowed (returns empty string)
123    /// * **iteration:** allowed (returns empty array)
124    /// * **attribute access of undefined values:** allowed (returns [`undefined`](Value::UNDEFINED))
125    Chainable,
126    /// Complains very quickly about undefined values.
127    ///
128    /// * **printing:** fails
129    /// * **iteration:** fails
130    /// * **attribute access of undefined values:** fails
131    Strict,
132}
133
134impl Default for UndefinedBehavior {
135    fn default() -> UndefinedBehavior {
136        UndefinedBehavior::Lenient
137    }
138}
139
140impl UndefinedBehavior {
141    /// Utility method used in the engine to determine what to do when an undefined is
142    /// encountered.
143    ///
144    /// The flag indicates if this is the first or second level of undefined value.  If
145    /// `parent_was_undefined` is set to `true`, the undefined was created by looking up
146    /// a missing attribute on an undefined value.  If `false` the undefined was created by
147    /// looking up a missing attribute on a defined value.
148    pub(crate) fn handle_undefined(self, parent_was_undefined: bool) -> Result<Value, Error> {
149        match (self, parent_was_undefined) {
150            (UndefinedBehavior::Lenient, false)
151            | (UndefinedBehavior::Strict, false)
152            | (UndefinedBehavior::Chainable, _) => Ok(Value::UNDEFINED),
153            (UndefinedBehavior::Lenient, true) | (UndefinedBehavior::Strict, true) => {
154                Err(Error::from(ErrorKind::UndefinedError))
155            }
156        }
157    }
158
159    /// Utility method to check if something is true.
160    ///
161    /// This fails only for strict undefined values.
162    #[inline]
163    pub(crate) fn is_true(self, value: &Value) -> Result<bool, Error> {
164        if matches!(self, UndefinedBehavior::Strict) && value.is_undefined() {
165            Err(Error::from(ErrorKind::UndefinedError))
166        } else {
167            Ok(value.is_true())
168        }
169    }
170
171    /// Tries to iterate over a value while handling the undefined value.
172    ///
173    /// If the value is undefined, then iteration fails if the behavior is set to strict,
174    /// otherwise it succeeds with an empty iteration.  This is also internally used in the
175    /// engine to convert values to lists.
176    #[inline]
177    pub(crate) fn try_iter(self, value: Value) -> Result<ValueIter, Error> {
178        self.assert_iterable(&value).and_then(|_| value.try_iter())
179    }
180
181    /// Are we strict on iteration?
182    #[inline]
183    pub(crate) fn assert_iterable(self, value: &Value) -> Result<(), Error> {
184        if matches!(self, UndefinedBehavior::Strict) && value.is_undefined() {
185            Err(Error::from(ErrorKind::UndefinedError))
186        } else {
187            Ok(())
188        }
189    }
190}
191
192/// Helper to HTML escape a string.
193pub struct HtmlEscape<'a>(pub &'a str);
194
195impl<'a> fmt::Display for HtmlEscape<'a> {
196    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
197        #[cfg(feature = "v_htmlescape")]
198        {
199            fmt::Display::fmt(&v_htmlescape::escape(self.0), f)
200        }
201        // this is taken from askama-escape
202        #[cfg(not(feature = "v_htmlescape"))]
203        {
204            let bytes = self.0.as_bytes();
205            let mut start = 0;
206
207            for (i, b) in bytes.iter().enumerate() {
208                macro_rules! escaping_body {
209                    ($quote:expr) => {{
210                        if start < i {
211                            // SAFETY: this is safe because we only push valid utf-8 bytes over
212                            ok!(f.write_str(unsafe {
213                                std::str::from_utf8_unchecked(&bytes[start..i])
214                            }));
215                        }
216                        ok!(f.write_str($quote));
217                        start = i + 1;
218                    }};
219                }
220                if b.wrapping_sub(b'"') <= b'>' - b'"' {
221                    match *b {
222                        b'<' => escaping_body!("&lt;"),
223                        b'>' => escaping_body!("&gt;"),
224                        b'&' => escaping_body!("&amp;"),
225                        b'"' => escaping_body!("&quot;"),
226                        b'\'' => escaping_body!("&#x27;"),
227                        b'/' => escaping_body!("&#x2f;"),
228                        _ => (),
229                    }
230                }
231            }
232
233            if start < bytes.len() {
234                // SAFETY: this is safe because we only push valid utf-8 bytes over
235                f.write_str(unsafe { std::str::from_utf8_unchecked(&bytes[start..]) })
236            } else {
237                Ok(())
238            }
239        }
240    }
241}
242
243struct Unescaper {
244    out: String,
245    pending_surrogate: u16,
246}
247
248impl Unescaper {
249    fn unescape(mut self, s: &str) -> Result<String, Error> {
250        let mut char_iter = s.chars();
251
252        while let Some(c) = char_iter.next() {
253            if c == '\\' {
254                match char_iter.next() {
255                    None => return Err(ErrorKind::BadEscape.into()),
256                    Some(d) => match d {
257                        '"' | '\\' | '/' | '\'' => ok!(self.push_char(d)),
258                        'b' => ok!(self.push_char('\x08')),
259                        'f' => ok!(self.push_char('\x0C')),
260                        'n' => ok!(self.push_char('\n')),
261                        'r' => ok!(self.push_char('\r')),
262                        't' => ok!(self.push_char('\t')),
263                        'u' => {
264                            let val = ok!(self.parse_u16(&mut char_iter));
265                            ok!(self.push_u16(val));
266                        }
267                        _ => return Err(ErrorKind::BadEscape.into()),
268                    },
269                }
270            } else {
271                ok!(self.push_char(c));
272            }
273        }
274
275        if self.pending_surrogate != 0 {
276            Err(ErrorKind::BadEscape.into())
277        } else {
278            Ok(self.out)
279        }
280    }
281
282    fn parse_u16(&self, chars: &mut Chars) -> Result<u16, Error> {
283        let hexnum = chars.chain(repeat('\0')).take(4).collect::<String>();
284        u16::from_str_radix(&hexnum, 16).map_err(|_| ErrorKind::BadEscape.into())
285    }
286
287    fn push_u16(&mut self, c: u16) -> Result<(), Error> {
288        match (self.pending_surrogate, (0xD800..=0xDFFF).contains(&c)) {
289            (0, false) => match decode_utf16(once(c)).next() {
290                Some(Ok(c)) => self.out.push(c),
291                _ => return Err(ErrorKind::BadEscape.into()),
292            },
293            (_, false) => return Err(ErrorKind::BadEscape.into()),
294            (0, true) => self.pending_surrogate = c,
295            (prev, true) => match decode_utf16(once(prev).chain(once(c))).next() {
296                Some(Ok(c)) => {
297                    self.out.push(c);
298                    self.pending_surrogate = 0;
299                }
300                _ => return Err(ErrorKind::BadEscape.into()),
301            },
302        }
303        Ok(())
304    }
305
306    fn push_char(&mut self, c: char) -> Result<(), Error> {
307        if self.pending_surrogate != 0 {
308            Err(ErrorKind::BadEscape.into())
309        } else {
310            self.out.push(c);
311            Ok(())
312        }
313    }
314}
315
316/// Un-escape a string, following JSON rules.
317pub fn unescape(s: &str) -> Result<String, Error> {
318    Unescaper {
319        out: String::new(),
320        pending_surrogate: 0,
321    }
322    .unescape(s)
323}
324
325pub struct BTreeMapKeysDebug<'a, K: fmt::Debug, V>(pub &'a BTreeMap<K, V>);
326
327impl<'a, K: fmt::Debug, V> fmt::Debug for BTreeMapKeysDebug<'a, K, V> {
328    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
329        f.debug_list().entries(self.0.iter().map(|x| x.0)).finish()
330    }
331}
332
333pub struct OnDrop<F: FnOnce()>(Option<F>);
334
335impl<F: FnOnce()> OnDrop<F> {
336    pub fn new(f: F) -> Self {
337        Self(Some(f))
338    }
339}
340
341impl<F: FnOnce()> Drop for OnDrop<F> {
342    fn drop(&mut self) {
343        self.0.take().unwrap()();
344    }
345}
346
347#[cfg(feature = "builtins")]
348pub fn splitn_whitespace(s: &str, maxsplits: usize) -> impl Iterator<Item = &str> + '_ {
349    let mut splits = 1;
350    let mut skip_ws = true;
351    let mut split_start = None;
352    let mut last_split_end = 0;
353    let mut chars = s.char_indices();
354
355    std::iter::from_fn(move || {
356        for (idx, c) in chars.by_ref() {
357            if splits >= maxsplits && !skip_ws {
358                continue;
359            } else if c.is_whitespace() {
360                if let Some(old) = split_start {
361                    let rv = &s[old..idx];
362                    split_start = None;
363                    last_split_end = idx;
364                    splits += 1;
365                    skip_ws = true;
366                    return Some(rv);
367                }
368            } else {
369                skip_ws = false;
370                if split_start.is_none() {
371                    split_start = Some(idx);
372                    last_split_end = idx;
373                }
374            }
375        }
376
377        let rest = &s[last_split_end..];
378        if !rest.is_empty() {
379            last_split_end = s.len();
380            Some(rest)
381        } else {
382            None
383        }
384    })
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390
391    use similar_asserts::assert_eq;
392
393    #[test]
394    fn test_html_escape() {
395        let input = "<>&\"'/";
396        let output = HtmlEscape(input).to_string();
397        assert_eq!(output, "&lt;&gt;&amp;&quot;&#x27;&#x2f;");
398    }
399
400    #[test]
401    fn test_unescape() {
402        assert_eq!(unescape(r"foo\u2603bar").unwrap(), "foo\u{2603}bar");
403        assert_eq!(unescape(r"\t\b\f\r\n\\\/").unwrap(), "\t\x08\x0c\r\n\\/");
404        assert_eq!(unescape("foobarbaz").unwrap(), "foobarbaz");
405        assert_eq!(unescape(r"\ud83d\udca9").unwrap(), "💩");
406    }
407
408    #[test]
409    #[cfg(feature = "builtins")]
410    fn test_splitn_whitespace() {
411        fn s(s: &str, n: usize) -> Vec<&str> {
412            splitn_whitespace(s, n).collect::<Vec<_>>()
413        }
414
415        assert_eq!(s("a b c", 1), vec!["a b c"]);
416        assert_eq!(s("a b c", 2), vec!["a", "b c"]);
417        assert_eq!(s("a    b c", 2), vec!["a", "b c"]);
418        assert_eq!(s("a    b c   ", 2), vec!["a", "b c   "]);
419        assert_eq!(s("a   b   c", 3), vec!["a", "b", "c"]);
420        assert_eq!(s("a   b   c", 4), vec!["a", "b", "c"]);
421        assert_eq!(s("   a   b   c", 3), vec!["a", "b", "c"]);
422        assert_eq!(s("   a   b   c", 4), vec!["a", "b", "c"]);
423    }
424}