1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
//! HTML escaping
//!
//! By default sailfish replaces the characters `&"<>` with the equivalent html.

mod avx2;
mod fallback;
mod naive;
mod sse2;

use super::buffer::Buffer;

static ESCAPE_LUT: [u8; 256] = [
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 2, 9, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9,
];

const ESCAPED: [&str; 4] = ["&quot;", "&amp;", "&lt;", "&gt;"];
const ESCAPED_LEN: usize = 4;

/// write the escaped contents with custom function
///
/// This function is soft-deprecated because using this function causes a large binary size.
#[inline]
pub fn escape_with<F: FnMut(&str)>(mut writer: F, feed: &str) {
    unsafe {
        #[cfg(target_feature = "avx2")]
        {
            avx2::escape(&mut writer, feed.as_bytes());
        }

        #[cfg(not(target_feature = "avx2"))]
        {
            if is_x86_feature_detected!("avx2") {
                avx2::escape(&mut writer, feed.as_bytes());
            } else if is_x86_feature_detected!("sse2") {
                sse2::escape(&mut writer, feed.as_bytes());
            } else {
                fallback::escape(&mut writer, feed.as_bytes());
            }
        }
    }
}

#[doc(hidden)]
pub fn escape_to_buf(feed: &str, buf: &mut Buffer) {
    escape_with(|e| buf.write_str(e), feed);
}

/// write the escaped contents into `String`
///
/// # Examples
///
/// ```
/// use sailfish::runtime::escape::escape_to_string;
///
/// let mut buf = String::new();
/// escape_to_string("<h1>Hello, world!</h1>", &mut buf);
/// assert_eq!(buf, "&lt;h1&gt;Hello, world!&lt;/h1&gt;");
/// ```
#[inline]
pub fn escape_to_string(feed: &str, s: &mut String) {
    let mut s2 = String::new();
    std::mem::swap(s, &mut s2);
    let mut buf = Buffer::from(s2);
    escape_to_buf(feed, &mut buf);
    let mut s2 = buf.into_string();
    std::mem::swap(s, &mut s2);
}

#[cfg(test)]
mod tests {
    use super::*;

    fn escape(feed: &str) -> String {
        let mut buf = Buffer::new();
        escape_to_buf(feed, &mut buf);
        buf.into_string()
    }

    #[test]
    fn noescape() {
        assert_eq!(escape(""), "");
        assert_eq!(
            escape("abcdefghijklmnopqrstrvwxyz"),
            "abcdefghijklmnopqrstrvwxyz"
        );
        assert_eq!(escape("!#$%()*+,-.:;=?_^"), "!#$%()*+,-.:;=?_^");
        assert_eq!(
            escape("漢字はエスケープしないはずだよ"),
            "漢字はエスケープしないはずだよ"
        );
    }

    #[test]
    fn escape_short() {
        assert_eq!(escape("<"), "&lt;");
        assert_eq!(escape("\"&<>"), "&quot;&amp;&lt;&gt;");
        assert_eq!(
            escape("{\"title\": \"This is a JSON!\"}"),
            "{&quot;title&quot;: &quot;This is a JSON!&quot;}"
        );
        assert_eq!(
            escape("<html><body><h1>Hello, world</h1></body></html>"),
            "&lt;html&gt;&lt;body&gt;&lt;h1&gt;Hello, world&lt;/h1&gt;\
            &lt;/body&gt;&lt;/html&gt;"
        );
    }

    #[test]
    #[rustfmt::skip]
    fn escape_long() {
        assert_eq!(
            escape(r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r;o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqv<za&~r^]"{t4by2t`<q4bfo^&!so5/~(nxk:7l\;#0w41u~w3i$g|>e/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""###),
            r###"m{jml&amp;,?6&gt;\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k&quot;@p4$~?;!;pn_l8v.&quot;ki`%/&amp;^=\[y+qcerr`@3*|?du.\0vd#40.&gt;bcpf\u@m|c&lt;2t7`hk)^?&quot;0u{v%9}4y2hhv?%-f`&lt;;rzwx`7}l(j2b:c\&lt;|z&amp;$x{+k;f`0+w3e0\m.wmdli&gt;94e2hp\$}j0&amp;m(*h$/lwlj#}99r;o.kj@1#}~v+;y~b[~m.eci}&amp;l7fxt`\\{~#k*9z/d{}(.^j}[(,]:&lt;\h]9k2+0*w60/|23~5;/!-h&amp;ci*~e1h~+:1lhh\&gt;y_*&gt;:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*&lt;{6!&lt;(_|&lt;#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&amp;jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t&gt;@o*&quot;$]799r=+)t&gt;co?rvgk%u0c@.9os;#t_*/gqv&lt;za&amp;~r^]&quot;{t4by2t`&lt;q4bfo^&amp;!so5/~(nxk:7l\;#0w41u~w3i$g|&gt;e/t;o&lt;*`~?3.jyx+h)+^cn^j4td|&gt;)~rs)vm#]:&quot;&amp;\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo&quot;chyat3k#7pq1u=.l]c14twa4tg#5k_&quot;"###
        );
    }

    #[test]
    fn random() {
        const ASCII_CHARS: &'static [u8] = br##"abcdefghijklmnopqrstuvwxyz0123456789-^\@[;:],./\!"#$%&'()~=~|`{+*}<>?_"##;
        let mut state = 88172645463325252u64;
        let mut data = Vec::with_capacity(100);
        let mut buf1 = Buffer::new();
        let mut buf2 = Buffer::new();

        for len in 0..100 {
            data.clear();
            for _ in 0..len {
                // xorshift
                state ^= state << 13;
                state ^= state >> 7;
                state ^= state << 17;

                let idx = state as usize % ASCII_CHARS.len();
                data.push(ASCII_CHARS[idx]);
            }

            let s = unsafe { std::str::from_utf8_unchecked(&*data) };

            buf1.clear();
            buf2.clear();

            unsafe {
                escape_to_buf(&*s, &mut buf1);
                naive::escape(
                    &mut |s| buf2.write_str(s),
                    s.as_ptr(),
                    s.as_ptr(),
                    s.as_ptr().add(s.len()),
                );
            }

            assert_eq!(buf1.as_str(), buf2.as_str());
        }
    }
}