encoded_words/
charset.rs

1use std::borrow::Cow;
2
3use charset::Charset as EncodingCharset;
4use encoding_rs::Encoding;
5
6lazy_static::lazy_static! {
7    static ref UTF7: EncodingCharset = EncodingCharset::for_label(b"UTF-7").unwrap();
8}
9
10/// Map character sets to their email properties.
11///
12/// Provides information about the requirements imposed on email
13/// for a specific character set.
14/// Certain character sets must be encoded with quoted-printable or base64
15/// when used in email headers or bodies.  Certain character sets must be
16/// converted outright, and are not allowed in email.
17#[derive(Debug, Clone, Copy, PartialEq)]
18pub enum Charset {
19    Ascii,
20    Utf7,
21    Unknown8Bit,
22    Encoding(&'static Encoding),
23}
24
25impl Default for Charset {
26    fn default() -> Self {
27        Charset::Ascii
28    }
29}
30
31impl From<&'static Encoding> for Charset {
32    fn from(enc: &'static Encoding) -> Self {
33        Charset::Encoding(enc)
34    }
35}
36
37impl Charset {
38    pub fn name(&self) -> &'static str {
39        match self {
40            Charset::Ascii => "us-ascii",
41            Charset::Utf7 => "utf-7",
42            Charset::Unknown8Bit => "unknown-8bit",
43            Charset::Encoding(encoding) => encoding.name(),
44        }
45    }
46
47    pub fn for_label(label: &[u8]) -> Option<Self> {
48        if let Ok(enc) = std::str::from_utf8(label) {
49            let enc = enc.to_lowercase();
50            if enc == "us-ascii" {
51                return Some(Charset::Ascii);
52            }
53
54            if enc == "utf-7" {
55                return Some(Charset::Utf7);
56            }
57            if enc == "unknown-8bit" {
58                return Some(Charset::Unknown8Bit);
59            }
60        }
61
62        if let Some(enc) = Encoding::for_label(label) {
63            return Some(Charset::Encoding(enc));
64        }
65
66        None
67    }
68
69    // Convenience function for encoding strings, taking into account
70    // that they might be unknown-8bit (ie: have surrogate-escaped bytes)
71    pub fn encode(self, input: &str) -> (Cow<[u8]>, bool) {
72        match self {
73            Charset::Ascii => {
74                let (out, _, errors) = encoding_rs::WINDOWS_1252.encode(input);
75                (out, errors)
76            }
77            Charset::Utf7 | Charset::Unknown8Bit => (Cow::Borrowed(input.as_bytes()), false),
78            Charset::Encoding(encoding) => {
79                let (out, _, errors) = encoding.encode(input);
80                (out, errors)
81            }
82        }
83    }
84
85    pub fn decode_without_bom_handling(self, bytes: &[u8]) -> (Cow<str>, bool) {
86        match self {
87            Charset::Utf7 => UTF7.decode_without_bom_handling(bytes),
88            Charset::Unknown8Bit | Charset::Ascii => {
89                encoding_rs::WINDOWS_1252.decode_without_bom_handling(bytes)
90            }
91            Charset::Encoding(encoding) => encoding.decode_without_bom_handling(bytes),
92        }
93    }
94
95    /// Return the output character set.
96    pub fn get_output_charset(self) -> Charset {
97        match self {
98            Charset::Ascii | Charset::Utf7 | Charset::Unknown8Bit => Charset::default(),
99            Charset::Encoding(encoding) => Charset::Encoding(encoding.output_encoding()),
100        }
101    }
102}