Skip to main content

mail_parser/decoders/charsets/
map.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use super::{
8    multi_byte::*,
9    single_byte::*,
10    utf::{decoder_utf16, decoder_utf16_be, decoder_utf16_le, decoder_utf7},
11    DecoderFnc,
12};
13
14pub fn charset_decoder(charset: &[u8]) -> Option<DecoderFnc> {
15    let mut l_charset = [0u8; 45];
16
17    for (dest, src) in l_charset.iter_mut().zip(charset.iter()) {
18        *dest = match src {
19            b'A'..=b'Z' => *src + 32,
20            b'-' => b'_',
21            _ => *src,
22        };
23    }
24
25    hashify::tiny_map!(&l_charset[..charset.len().clamp(1, 45)],
26        "850" => decoder_ibm_850,
27        "866" => decoder_ibm866,
28        "arabic" => decoder_iso_8859_6,
29        "asmo_708" => decoder_iso_8859_6,
30        "big5" => decoder_big5,
31        "cp819" => decoder_iso_8859_1,
32        "cp850" => decoder_ibm_850,
33        "cp866" => decoder_ibm866,
34        "cp936" => decoder_gbk,
35        "csbig5" => decoder_big5,
36        "cseuckr" => decoder_euc_kr,
37        "cseucpkdfmtjapanese" => decoder_euc_jp,
38        "csgb18030" => decoder_gb18030,
39        "csgbk" => decoder_gbk,
40        "csibm866" => decoder_ibm866,
41        "csiso2022jp" => decoder_iso2022_jp,
42        "csiso885913" => decoder_iso_8859_13,
43        "csiso885914" => decoder_iso_8859_14,
44        "csiso885915" => decoder_iso_8859_15,
45        "csiso885916" => decoder_iso_8859_16,
46        "csisolatin1" => decoder_iso_8859_1,
47        "csisolatin2" => decoder_iso_8859_2,
48        "csisolatin3" => decoder_iso_8859_3,
49        "csisolatin4" => decoder_iso_8859_4,
50        "csisolatin5" => decoder_iso_8859_9,
51        "csisolatin6" => decoder_iso_8859_10,
52        "csisolatinarabic" => decoder_iso_8859_6,
53        "csisolatincyrillic" => decoder_iso_8859_5,
54        "csisolatingreek" => decoder_iso_8859_7,
55        "csisolatinhebrew" => decoder_iso_8859_8,
56        "cskoi8r" => decoder_koi8_r,
57        "cskoi8u" => decoder_koi8_u,
58        "csmacintosh" => decoder_macintosh,
59        "cspc850multilingual" => decoder_ibm_850,
60        "csshiftjis" => decoder_shift_jis,
61        "cstis620" => decoder_tis_620,
62        "csutf16" => decoder_utf16,
63        "csutf16be" => decoder_utf16_be,
64        "csutf16le" => decoder_utf16_le,
65        "csutf7" => decoder_utf7,
66        "cswindows1250" => decoder_cp1250,
67        "cswindows1251" => decoder_cp1251,
68        "cswindows1252" => decoder_cp1252,
69        "cswindows1253" => decoder_cp1253,
70        "cswindows1254" => decoder_cp1254,
71        "cswindows1255" => decoder_cp1255,
72        "cswindows1256" => decoder_cp1256,
73        "cswindows1257" => decoder_cp1257,
74        "cswindows1258" => decoder_cp1258,
75        "cswindows874" => decoder_windows874,
76        "cyrillic" => decoder_iso_8859_5,
77        "ecma_114" => decoder_iso_8859_6,
78        "ecma_118" => decoder_iso_8859_7,
79        "elot_928" => decoder_iso_8859_7,
80        "euc_jp" => decoder_euc_jp,
81        "euc_kr" => decoder_euc_kr,
82        "extended_unix_code_packed_format_for_japanese" => decoder_euc_jp,
83        "gb18030" => decoder_gb18030,
84        "gb2312" => decoder_gb18030,
85        "gbk" => decoder_gbk,
86        "greek" => decoder_iso_8859_7,
87        "greek8" => decoder_iso_8859_7,
88        "hebrew" => decoder_iso_8859_8,
89        "ibm819" => decoder_iso_8859_1,
90        "ibm850" => decoder_ibm_850,
91        "ibm866" => decoder_ibm866,
92        "iso_2022_jp" => decoder_iso2022_jp,
93        "iso_8859_1" => decoder_iso_8859_1,
94        "iso_8859_10" => decoder_iso_8859_10,
95        "iso_8859_10:1992" => decoder_iso_8859_10,
96        "iso_8859_11" => decoder_tis_620,
97        "iso_8859_13" => decoder_iso_8859_13,
98        "iso_8859_14" => decoder_iso_8859_14,
99        "iso_8859_14:1998" => decoder_iso_8859_14,
100        "iso_8859_15" => decoder_iso_8859_15,
101        "iso_8859_16" => decoder_iso_8859_16,
102        "iso_8859_16:2001" => decoder_iso_8859_16,
103        "iso_8859_1:1987" => decoder_iso_8859_1,
104        "iso_8859_2" => decoder_iso_8859_2,
105        "iso_8859_2:1987" => decoder_iso_8859_2,
106        "iso_8859_3" => decoder_iso_8859_3,
107        "iso_8859_3:1988" => decoder_iso_8859_3,
108        "iso_8859_4" => decoder_iso_8859_4,
109        "iso_8859_4:1988" => decoder_iso_8859_4,
110        "iso_8859_5" => decoder_iso_8859_5,
111        "iso_8859_5:1988" => decoder_iso_8859_5,
112        "iso_8859_6" => decoder_iso_8859_6,
113        "iso_8859_6:1987" => decoder_iso_8859_6,
114        "iso_8859_7" => decoder_iso_8859_7,
115        "iso_8859_7:1987" => decoder_iso_8859_7,
116        "iso_8859_8" => decoder_iso_8859_8,
117        "iso_8859_8:1988" => decoder_iso_8859_8,
118        "iso_8859_9" => decoder_iso_8859_9,
119        "iso_8859_9:1989" => decoder_iso_8859_9,
120        "iso_celtic" => decoder_iso_8859_14,
121        "iso_ir_100" => decoder_iso_8859_1,
122        "iso_ir_101" => decoder_iso_8859_2,
123        "iso_ir_109" => decoder_iso_8859_3,
124        "iso_ir_110" => decoder_iso_8859_4,
125        "iso_ir_126" => decoder_iso_8859_7,
126        "iso_ir_127" => decoder_iso_8859_6,
127        "iso_ir_138" => decoder_iso_8859_8,
128        "iso_ir_144" => decoder_iso_8859_5,
129        "iso_ir_148" => decoder_iso_8859_9,
130        "iso_ir_157" => decoder_iso_8859_10,
131        "iso_ir_199" => decoder_iso_8859_14,
132        "iso_ir_226" => decoder_iso_8859_16,
133        "koi8_r" => decoder_koi8_r,
134        "koi8_u" => decoder_koi8_u,
135        "ks_c_5601_1987" => decoder_euc_kr,
136        "ks_c_5601_1989" => decoder_euc_kr,
137        "l1" => decoder_iso_8859_1,
138        "l10" => decoder_iso_8859_16,
139        "l2" => decoder_iso_8859_2,
140        "l3" => decoder_iso_8859_3,
141        "l4" => decoder_iso_8859_4,
142        "l5" => decoder_iso_8859_9,
143        "l6" => decoder_iso_8859_10,
144        "l8" => decoder_iso_8859_14,
145        "latin1" => decoder_iso_8859_1,
146        "latin10" => decoder_iso_8859_16,
147        "latin2" => decoder_iso_8859_2,
148        "latin3" => decoder_iso_8859_3,
149        "latin4" => decoder_iso_8859_4,
150        "latin5" => decoder_iso_8859_9,
151        "latin6" => decoder_iso_8859_10,
152        "latin8" => decoder_iso_8859_14,
153        "latin_9" => decoder_iso_8859_15,
154        "mac" => decoder_macintosh,
155        "macintosh" => decoder_macintosh,
156        "ms936" => decoder_gbk,
157        "ms_kanji" => decoder_shift_jis,
158        "shift_jis" => decoder_shift_jis,
159        "tis_620" => decoder_tis_620,
160        "utf_16" => decoder_utf16,
161        "utf_16be" => decoder_utf16_be,
162        "utf_16le" => decoder_utf16_le,
163        "utf_7" => decoder_utf7,
164        "windows_1250" => decoder_cp1250,
165        "windows_1251" => decoder_cp1251,
166        "windows_1252" => decoder_cp1252,
167        "windows_1253" => decoder_cp1253,
168        "windows_1254" => decoder_cp1254,
169        "windows_1255" => decoder_cp1255,
170        "windows_1256" => decoder_cp1256,
171        "windows_1257" => decoder_cp1257,
172        "windows_1258" => decoder_cp1258,
173        "windows_874" => decoder_windows874,
174        "windows_936" => decoder_gbk,
175    )
176}
177
178#[cfg(test)]
179mod tests {
180    use super::charset_decoder;
181
182    #[test]
183    fn decoder_charset() {
184        for input in ["gbk", "extended_unix_code_packed_format_for_japanese"] {
185            if !input.is_empty() {
186                assert!(
187                    charset_decoder(input.as_bytes()).is_some(),
188                    "Failed for {input}",
189                );
190            }
191        }
192    }
193}