lol_html/base/
encoding.rs

1use crate::rewriter::AsciiCompatibleEncoding;
2use encoding_rs::Encoding;
3use std::sync::atomic::{AtomicUsize, Ordering};
4use std::sync::Arc;
5
6/// This serves as a map from integer to [`Encoding`], which allows more efficient
7/// sets/gets of the [`SharedEncoding`].
8static ALL_ENCODINGS: [&Encoding; 40] = [
9    &encoding_rs::UTF_8_INIT,
10    &encoding_rs::SHIFT_JIS_INIT,
11    &encoding_rs::BIG5_INIT,
12    &encoding_rs::EUC_JP_INIT,
13    &encoding_rs::EUC_KR_INIT,
14    &encoding_rs::GB18030_INIT,
15    &encoding_rs::GBK_INIT,
16    &encoding_rs::IBM866_INIT,
17    &encoding_rs::ISO_8859_2_INIT,
18    &encoding_rs::ISO_8859_3_INIT,
19    &encoding_rs::ISO_8859_4_INIT,
20    &encoding_rs::ISO_8859_5_INIT,
21    &encoding_rs::ISO_8859_6_INIT,
22    &encoding_rs::ISO_8859_7_INIT,
23    &encoding_rs::ISO_8859_8_I_INIT,
24    &encoding_rs::ISO_8859_8_INIT,
25    &encoding_rs::ISO_8859_10_INIT,
26    &encoding_rs::ISO_8859_13_INIT,
27    &encoding_rs::ISO_8859_14_INIT,
28    &encoding_rs::ISO_8859_15_INIT,
29    &encoding_rs::ISO_8859_16_INIT,
30    &encoding_rs::KOI8_R_INIT,
31    &encoding_rs::KOI8_U_INIT,
32    &encoding_rs::MACINTOSH_INIT,
33    &encoding_rs::WINDOWS_1250_INIT,
34    &encoding_rs::WINDOWS_1251_INIT,
35    &encoding_rs::WINDOWS_1252_INIT,
36    &encoding_rs::WINDOWS_1253_INIT,
37    &encoding_rs::WINDOWS_1254_INIT,
38    &encoding_rs::WINDOWS_1255_INIT,
39    &encoding_rs::WINDOWS_1256_INIT,
40    &encoding_rs::WINDOWS_1257_INIT,
41    &encoding_rs::WINDOWS_1258_INIT,
42    &encoding_rs::WINDOWS_874_INIT,
43    &encoding_rs::X_MAC_CYRILLIC_INIT,
44    &encoding_rs::X_USER_DEFINED_INIT,
45    // non-ASCII-compatible
46    &encoding_rs::REPLACEMENT_INIT,
47    &encoding_rs::UTF_16BE_INIT,
48    &encoding_rs::UTF_16LE_INIT,
49    &encoding_rs::ISO_2022_JP_INIT,
50];
51
52fn encoding_to_index(encoding: AsciiCompatibleEncoding) -> usize {
53    let encoding: &'static Encoding = encoding.into();
54
55    ALL_ENCODINGS
56        .iter()
57        .position(|&e| e == encoding)
58        .expect("the ALL_ENCODINGS is not complete and needs to be updated")
59}
60
61/// A charset encoding that can be shared and modified.
62///
63/// This is, for instance, used to adapt the charset dynamically in a [`crate::HtmlRewriter`] if it
64/// encounters a `meta` tag that specifies the charset (that behavior is dependent on
65/// [`crate::Settings::adjust_charset_on_meta_tag`]).
66// Pub only for integration tests
67#[derive(Clone)]
68pub struct SharedEncoding {
69    encoding: Arc<AtomicUsize>,
70}
71
72impl SharedEncoding {
73    #[must_use]
74    pub fn new(encoding: AsciiCompatibleEncoding) -> Self {
75        Self {
76            encoding: Arc::new(AtomicUsize::new(encoding_to_index(encoding))),
77        }
78    }
79
80    #[must_use]
81    pub fn get(&self) -> &'static Encoding {
82        let encoding = self.encoding.load(Ordering::Relaxed);
83        // it will never be out of range, but get() avoids a panic branch
84        ALL_ENCODINGS.get(encoding).unwrap_or(&ALL_ENCODINGS[0])
85    }
86
87    pub fn set(&self, encoding: AsciiCompatibleEncoding) {
88        self.encoding
89            .store(encoding_to_index(encoding), Ordering::Relaxed);
90    }
91}
92
93#[cfg(test)]
94mod tests {
95    use crate::base::encoding::ALL_ENCODINGS;
96    use crate::base::SharedEncoding;
97    use crate::AsciiCompatibleEncoding;
98
99    #[test]
100    fn test_encoding_round_trip() {
101        let shared_encoding = SharedEncoding::new(AsciiCompatibleEncoding::utf_8());
102
103        for encoding in ALL_ENCODINGS {
104            if let Some(ascii_compat_encoding) = AsciiCompatibleEncoding::new(encoding) {
105                shared_encoding.set(ascii_compat_encoding);
106                assert_eq!(shared_encoding.get(), encoding);
107            }
108        }
109    }
110}