xml_no_std/
escape.rs

1//! Contains functions for performing XML special characters escaping.
2extern crate alloc;
3
4use alloc::borrow::Cow;
5use alloc::string::ToString;
6use core::marker::PhantomData;
7use core::fmt::{Display, Result, Formatter};
8
9pub(crate) trait Escapes {
10    fn escape(c: u8) -> Option<&'static str>;
11
12    fn byte_needs_escaping(c: u8) -> bool {
13        Self::escape(c).is_some()
14    }
15
16    fn str_needs_escaping(s: &str) -> bool {
17        s.bytes().any(|c| Self::escape(c).is_some())
18    }
19}
20
21pub(crate) struct Escaped<'a, E: Escapes> {
22    _escape_phantom: PhantomData<E>,
23    to_escape: &'a str,
24}
25
26impl<'a, E: Escapes> Escaped<'a, E> {
27    pub const fn new(s: &'a str) -> Self {
28        Escaped {
29            _escape_phantom: PhantomData,
30            to_escape: s,
31        }
32    }
33}
34
35impl<E: Escapes> Display for Escaped<'_, E> {
36    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
37        let mut total_remaining = self.to_escape;
38
39        // find the next occurence
40        while let Some(n) = total_remaining
41            .bytes()
42            .position(E::byte_needs_escaping)
43        {
44            let (start, remaining) = total_remaining.split_at(n);
45
46            f.write_str(start)?;
47
48            // unwrap is safe because we checked is_some for position n earlier
49            let next_byte = remaining.bytes().next().unwrap();
50            let replacement = E::escape(next_byte).unwrap_or("unexpected token");
51            f.write_str(replacement)?;
52
53            total_remaining = &remaining[1..];
54        }
55
56        f.write_str(total_remaining)
57    }
58}
59
60fn escape_str<E: Escapes>(s: &str) -> Cow<'_, str> {
61    if E::str_needs_escaping(s) {
62        Cow::Owned(Escaped::<E>::new(s).to_string())
63    } else {
64        Cow::Borrowed(s)
65    }
66}
67
68macro_rules! escapes {
69    {
70        $name: ident,
71        $($k: expr => $v: expr),* $(,)?
72    } => {
73        pub(crate) struct $name;
74
75        impl Escapes for $name {
76            fn escape(c: u8) -> Option<&'static str> {
77                match c {
78                    $( $k => Some($v),)*
79                    _ => None
80                }
81            }
82        }
83    };
84}
85
86escapes!(
87    AttributeEscapes,
88    b'<'  => "&lt;",
89    b'>'  => "&gt;",
90    b'"'  => "&quot;",
91    b'\'' => "&apos;",
92    b'&'  => "&amp;",
93    b'\n' => "&#xA;",
94    b'\r' => "&#xD;",
95);
96
97escapes!(
98    PcDataEscapes,
99    b'<' => "&lt;",
100    b'>' => "&gt;",
101    b'&' => "&amp;",
102);
103
104/// Performs escaping of common XML characters inside an attribute value.
105///
106/// This function replaces several important markup characters with their
107/// entity equivalents:
108///
109/// * `<` → `&lt;`
110/// * `>` → `&gt;`
111/// * `"` → `&quot;`
112/// * `'` → `&apos;`
113/// * `&` → `&amp;`
114///
115/// The following characters are escaped so that attributes are printed on
116/// a single line:
117/// * `\n` → `&#xA;`
118/// * `\r` → `&#xD;`
119///
120/// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
121///
122/// Does not perform allocations if the given string does not contain escapable characters.
123#[inline]
124#[must_use]
125pub fn escape_str_attribute(s: &str) -> Cow<'_, str> {
126    escape_str::<AttributeEscapes>(s)
127}
128
129/// Performs escaping of common XML characters inside PCDATA.
130///
131/// This function replaces several important markup characters with their
132/// entity equivalents:
133///
134/// * `<` → `&lt;`
135/// * `&` → `&amp;`
136///
137/// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
138///
139/// Does not perform allocations if the given string does not contain escapable characters.
140#[inline]
141#[must_use]
142pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> {
143    escape_str::<PcDataEscapes>(s)
144}
145
146#[cfg(test)]
147mod tests {
148    use super::{escape_str_attribute, escape_str_pcdata};
149
150    #[test]
151    fn test_escape_str_attribute() {
152        assert_eq!(escape_str_attribute("<>'\"&\n\r"), "&lt;&gt;&apos;&quot;&amp;&#xA;&#xD;");
153        assert_eq!(escape_str_attribute("no_escapes"), "no_escapes");
154    }
155
156    #[test]
157    fn test_escape_str_pcdata() {
158        assert_eq!(escape_str_pcdata("<>&"), "&lt;&gt;&amp;");
159        assert_eq!(escape_str_pcdata("no_escapes"), "no_escapes");
160    }
161
162    #[test]
163    fn test_escape_multibyte_code_points() {
164        assert_eq!(escape_str_attribute("☃<"), "☃&lt;");
165        assert_eq!(escape_str_pcdata("☃<"), "☃&lt;");
166    }
167}