1use std::borrow::Cow;
2
3use charset::Charset as EncodingCharset;
4use encoding_rs::Encoding;
5
6lazy_static::lazy_static! {
7 static ref UTF7: EncodingCharset = EncodingCharset::for_label(b"UTF-7").unwrap();
8}
9
10#[derive(Debug, Clone, Copy, PartialEq)]
18pub enum Charset {
19 Ascii,
20 Utf7,
21 Unknown8Bit,
22 Encoding(&'static Encoding),
23}
24
25impl Default for Charset {
26 fn default() -> Self {
27 Charset::Ascii
28 }
29}
30
31impl From<&'static Encoding> for Charset {
32 fn from(enc: &'static Encoding) -> Self {
33 Charset::Encoding(enc)
34 }
35}
36
37impl Charset {
38 pub fn name(&self) -> &'static str {
39 match self {
40 Charset::Ascii => "us-ascii",
41 Charset::Utf7 => "utf-7",
42 Charset::Unknown8Bit => "unknown-8bit",
43 Charset::Encoding(encoding) => encoding.name(),
44 }
45 }
46
47 pub fn for_label(label: &[u8]) -> Option<Self> {
48 if let Ok(enc) = std::str::from_utf8(label) {
49 let enc = enc.to_lowercase();
50 if enc == "us-ascii" {
51 return Some(Charset::Ascii);
52 }
53
54 if enc == "utf-7" {
55 return Some(Charset::Utf7);
56 }
57 if enc == "unknown-8bit" {
58 return Some(Charset::Unknown8Bit);
59 }
60 }
61
62 if let Some(enc) = Encoding::for_label(label) {
63 return Some(Charset::Encoding(enc));
64 }
65
66 None
67 }
68
69 pub fn encode(self, input: &str) -> (Cow<[u8]>, bool) {
72 match self {
73 Charset::Ascii => {
74 let (out, _, errors) = encoding_rs::WINDOWS_1252.encode(input);
75 (out, errors)
76 }
77 Charset::Utf7 | Charset::Unknown8Bit => (Cow::Borrowed(input.as_bytes()), false),
78 Charset::Encoding(encoding) => {
79 let (out, _, errors) = encoding.encode(input);
80 (out, errors)
81 }
82 }
83 }
84
85 pub fn decode_without_bom_handling(self, bytes: &[u8]) -> (Cow<str>, bool) {
86 match self {
87 Charset::Utf7 => UTF7.decode_without_bom_handling(bytes),
88 Charset::Unknown8Bit | Charset::Ascii => {
89 encoding_rs::WINDOWS_1252.decode_without_bom_handling(bytes)
90 }
91 Charset::Encoding(encoding) => encoding.decode_without_bom_handling(bytes),
92 }
93 }
94
95 pub fn get_output_charset(self) -> Charset {
97 match self {
98 Charset::Ascii | Charset::Utf7 | Charset::Unknown8Bit => Charset::default(),
99 Charset::Encoding(encoding) => Charset::Encoding(encoding.output_encoding()),
100 }
101 }
102}