1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
use std::borrow::Cow;
use charset::Charset as EncodingCharset;
use encoding_rs::Encoding;
lazy_static::lazy_static! {
static ref UTF7: EncodingCharset = EncodingCharset::for_label(b"UTF-7").unwrap();
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Charset {
Ascii,
Utf7,
Unknown8Bit,
Encoding(&'static Encoding),
}
impl Default for Charset {
fn default() -> Self {
Charset::Ascii
}
}
impl From<&'static Encoding> for Charset {
fn from(enc: &'static Encoding) -> Self {
Charset::Encoding(enc)
}
}
impl Charset {
pub fn name(&self) -> &'static str {
match self {
Charset::Ascii => "us-ascii",
Charset::Utf7 => "utf-7",
Charset::Unknown8Bit => "unknown-8bit",
Charset::Encoding(encoding) => encoding.name(),
}
}
pub fn for_label(label: &[u8]) -> Option<Self> {
if let Ok(enc) = std::str::from_utf8(label) {
let enc = enc.to_lowercase();
if enc == "us-ascii" {
return Some(Charset::Ascii);
}
if enc == "utf-7" {
return Some(Charset::Utf7);
}
if enc == "unknown-8bit" {
return Some(Charset::Unknown8Bit);
}
}
if let Some(enc) = Encoding::for_label(label) {
return Some(Charset::Encoding(enc));
}
None
}
pub fn encode(self, input: &str) -> (Cow<[u8]>, bool) {
match self {
Charset::Ascii => {
let (out, _, errors) = encoding_rs::WINDOWS_1252.encode(input);
(out, errors)
}
Charset::Utf7 | Charset::Unknown8Bit => (Cow::Borrowed(input.as_bytes()), false),
Charset::Encoding(encoding) => {
let (out, _, errors) = encoding.encode(input);
(out, errors)
}
}
}
pub fn decode_without_bom_handling(self, bytes: &[u8]) -> (Cow<str>, bool) {
match self {
Charset::Utf7 => UTF7.decode_without_bom_handling(bytes),
Charset::Unknown8Bit | Charset::Ascii => {
encoding_rs::WINDOWS_1252.decode_without_bom_handling(bytes)
}
Charset::Encoding(encoding) => encoding.decode_without_bom_handling(bytes),
}
}
pub fn get_output_charset(self) -> Charset {
match self {
Charset::Ascii | Charset::Utf7 | Charset::Unknown8Bit => Charset::default(),
Charset::Encoding(encoding) => Charset::Encoding(encoding.output_encoding()),
}
}
}