1use super::{
8 multi_byte::*,
9 single_byte::*,
10 utf::{decoder_utf16, decoder_utf16_be, decoder_utf16_le, decoder_utf7},
11 DecoderFnc,
12};
13
14pub fn charset_decoder(charset: &[u8]) -> Option<DecoderFnc> {
15 let mut l_charset = [0u8; 45];
16
17 for (dest, src) in l_charset.iter_mut().zip(charset.iter()) {
18 *dest = match src {
19 b'A'..=b'Z' => *src + 32,
20 b'-' => b'_',
21 _ => *src,
22 };
23 }
24
25 hashify::tiny_map!(&l_charset[..charset.len().clamp(1, 45)],
26 "850" => decoder_ibm_850,
27 "866" => decoder_ibm866,
28 "arabic" => decoder_iso_8859_6,
29 "asmo_708" => decoder_iso_8859_6,
30 "big5" => decoder_big5,
31 "cp819" => decoder_iso_8859_1,
32 "cp850" => decoder_ibm_850,
33 "cp866" => decoder_ibm866,
34 "cp936" => decoder_gbk,
35 "csbig5" => decoder_big5,
36 "cseuckr" => decoder_euc_kr,
37 "cseucpkdfmtjapanese" => decoder_euc_jp,
38 "csgb18030" => decoder_gb18030,
39 "csgbk" => decoder_gbk,
40 "csibm866" => decoder_ibm866,
41 "csiso2022jp" => decoder_iso2022_jp,
42 "csiso885913" => decoder_iso_8859_13,
43 "csiso885914" => decoder_iso_8859_14,
44 "csiso885915" => decoder_iso_8859_15,
45 "csiso885916" => decoder_iso_8859_16,
46 "csisolatin1" => decoder_iso_8859_1,
47 "csisolatin2" => decoder_iso_8859_2,
48 "csisolatin3" => decoder_iso_8859_3,
49 "csisolatin4" => decoder_iso_8859_4,
50 "csisolatin5" => decoder_iso_8859_9,
51 "csisolatin6" => decoder_iso_8859_10,
52 "csisolatinarabic" => decoder_iso_8859_6,
53 "csisolatincyrillic" => decoder_iso_8859_5,
54 "csisolatingreek" => decoder_iso_8859_7,
55 "csisolatinhebrew" => decoder_iso_8859_8,
56 "cskoi8r" => decoder_koi8_r,
57 "cskoi8u" => decoder_koi8_u,
58 "csmacintosh" => decoder_macintosh,
59 "cspc850multilingual" => decoder_ibm_850,
60 "csshiftjis" => decoder_shift_jis,
61 "cstis620" => decoder_tis_620,
62 "csutf16" => decoder_utf16,
63 "csutf16be" => decoder_utf16_be,
64 "csutf16le" => decoder_utf16_le,
65 "csutf7" => decoder_utf7,
66 "cswindows1250" => decoder_cp1250,
67 "cswindows1251" => decoder_cp1251,
68 "cswindows1252" => decoder_cp1252,
69 "cswindows1253" => decoder_cp1253,
70 "cswindows1254" => decoder_cp1254,
71 "cswindows1255" => decoder_cp1255,
72 "cswindows1256" => decoder_cp1256,
73 "cswindows1257" => decoder_cp1257,
74 "cswindows1258" => decoder_cp1258,
75 "cswindows874" => decoder_windows874,
76 "cyrillic" => decoder_iso_8859_5,
77 "ecma_114" => decoder_iso_8859_6,
78 "ecma_118" => decoder_iso_8859_7,
79 "elot_928" => decoder_iso_8859_7,
80 "euc_jp" => decoder_euc_jp,
81 "euc_kr" => decoder_euc_kr,
82 "extended_unix_code_packed_format_for_japanese" => decoder_euc_jp,
83 "gb18030" => decoder_gb18030,
84 "gb2312" => decoder_gb18030,
85 "gbk" => decoder_gbk,
86 "greek" => decoder_iso_8859_7,
87 "greek8" => decoder_iso_8859_7,
88 "hebrew" => decoder_iso_8859_8,
89 "ibm819" => decoder_iso_8859_1,
90 "ibm850" => decoder_ibm_850,
91 "ibm866" => decoder_ibm866,
92 "iso_2022_jp" => decoder_iso2022_jp,
93 "iso_8859_1" => decoder_iso_8859_1,
94 "iso_8859_10" => decoder_iso_8859_10,
95 "iso_8859_10:1992" => decoder_iso_8859_10,
96 "iso_8859_11" => decoder_tis_620,
97 "iso_8859_13" => decoder_iso_8859_13,
98 "iso_8859_14" => decoder_iso_8859_14,
99 "iso_8859_14:1998" => decoder_iso_8859_14,
100 "iso_8859_15" => decoder_iso_8859_15,
101 "iso_8859_16" => decoder_iso_8859_16,
102 "iso_8859_16:2001" => decoder_iso_8859_16,
103 "iso_8859_1:1987" => decoder_iso_8859_1,
104 "iso_8859_2" => decoder_iso_8859_2,
105 "iso_8859_2:1987" => decoder_iso_8859_2,
106 "iso_8859_3" => decoder_iso_8859_3,
107 "iso_8859_3:1988" => decoder_iso_8859_3,
108 "iso_8859_4" => decoder_iso_8859_4,
109 "iso_8859_4:1988" => decoder_iso_8859_4,
110 "iso_8859_5" => decoder_iso_8859_5,
111 "iso_8859_5:1988" => decoder_iso_8859_5,
112 "iso_8859_6" => decoder_iso_8859_6,
113 "iso_8859_6:1987" => decoder_iso_8859_6,
114 "iso_8859_7" => decoder_iso_8859_7,
115 "iso_8859_7:1987" => decoder_iso_8859_7,
116 "iso_8859_8" => decoder_iso_8859_8,
117 "iso_8859_8:1988" => decoder_iso_8859_8,
118 "iso_8859_9" => decoder_iso_8859_9,
119 "iso_8859_9:1989" => decoder_iso_8859_9,
120 "iso_celtic" => decoder_iso_8859_14,
121 "iso_ir_100" => decoder_iso_8859_1,
122 "iso_ir_101" => decoder_iso_8859_2,
123 "iso_ir_109" => decoder_iso_8859_3,
124 "iso_ir_110" => decoder_iso_8859_4,
125 "iso_ir_126" => decoder_iso_8859_7,
126 "iso_ir_127" => decoder_iso_8859_6,
127 "iso_ir_138" => decoder_iso_8859_8,
128 "iso_ir_144" => decoder_iso_8859_5,
129 "iso_ir_148" => decoder_iso_8859_9,
130 "iso_ir_157" => decoder_iso_8859_10,
131 "iso_ir_199" => decoder_iso_8859_14,
132 "iso_ir_226" => decoder_iso_8859_16,
133 "koi8_r" => decoder_koi8_r,
134 "koi8_u" => decoder_koi8_u,
135 "ks_c_5601_1987" => decoder_euc_kr,
136 "ks_c_5601_1989" => decoder_euc_kr,
137 "l1" => decoder_iso_8859_1,
138 "l10" => decoder_iso_8859_16,
139 "l2" => decoder_iso_8859_2,
140 "l3" => decoder_iso_8859_3,
141 "l4" => decoder_iso_8859_4,
142 "l5" => decoder_iso_8859_9,
143 "l6" => decoder_iso_8859_10,
144 "l8" => decoder_iso_8859_14,
145 "latin1" => decoder_iso_8859_1,
146 "latin10" => decoder_iso_8859_16,
147 "latin2" => decoder_iso_8859_2,
148 "latin3" => decoder_iso_8859_3,
149 "latin4" => decoder_iso_8859_4,
150 "latin5" => decoder_iso_8859_9,
151 "latin6" => decoder_iso_8859_10,
152 "latin8" => decoder_iso_8859_14,
153 "latin_9" => decoder_iso_8859_15,
154 "mac" => decoder_macintosh,
155 "macintosh" => decoder_macintosh,
156 "ms936" => decoder_gbk,
157 "ms_kanji" => decoder_shift_jis,
158 "shift_jis" => decoder_shift_jis,
159 "tis_620" => decoder_tis_620,
160 "utf_16" => decoder_utf16,
161 "utf_16be" => decoder_utf16_be,
162 "utf_16le" => decoder_utf16_le,
163 "utf_7" => decoder_utf7,
164 "windows_1250" => decoder_cp1250,
165 "windows_1251" => decoder_cp1251,
166 "windows_1252" => decoder_cp1252,
167 "windows_1253" => decoder_cp1253,
168 "windows_1254" => decoder_cp1254,
169 "windows_1255" => decoder_cp1255,
170 "windows_1256" => decoder_cp1256,
171 "windows_1257" => decoder_cp1257,
172 "windows_1258" => decoder_cp1258,
173 "windows_874" => decoder_windows874,
174 "windows_936" => decoder_gbk,
175 )
176}
177
178#[cfg(test)]
179mod tests {
180 use super::charset_decoder;
181
182 #[test]
183 fn decoder_charset() {
184 for input in ["gbk", "extended_unix_code_packed_format_for_japanese"] {
185 if !input.is_empty() {
186 assert!(
187 charset_decoder(input.as_bytes()).is_some(),
188 "Failed for {input}",
189 );
190 }
191 }
192 }
193}