charsets/
lib.rs

1#![cfg_attr(test, deny(missing_docs))]
2#![cfg_attr(test, deny(warnings))]
3
4//! The crate provides an enum representing all charset names used in Media Types
5//! and HTTP header values. The list can be found at [the IANA Character Sets
6//! registry](http://www.iana.org/assignments/character-sets/character-sets.xhtml).
7//!
8//! Charset names can be parsed from string, formatted to string and compared.
9//! Charset names can be parsed from string, formatted to string and compared.
10//! Unregistered charsets are represented using an `Unregistered` variant.
11
12use std::fmt::{self, Display};
13use std::str::FromStr;
14use std::ascii::AsciiExt;
15
16pub use self::Charset::*;
17
18/// A Mime charset.
19///
20/// The string representation is normalised to upper case.
21///
22/// See http://www.iana.org/assignments/character-sets/character-sets.xhtml
23#[derive(Clone, Debug, Eq, Ord, PartialOrd)]
24pub enum Charset {
25    /// US ASCII
26    UsAscii,
27    /// ISO-8859-1
28    Iso88591,
29    /// ISO-8859-2
30    Iso88592,
31    /// ISO-8859-3
32    Iso88593,
33    /// ISO-8859-4
34    Iso88594,
35    /// ISO-8859-5
36    Iso88595,
37    /// ISO-8859-6
38    Iso88596,
39    /// ISO-8859-7
40    Iso88597,
41    /// ISO-8859-8
42    Iso88598,
43    /// ISO-8859-9
44    Iso88599,
45    /// ISO-8859-10
46    Iso885910,
47    /// Shift_JIS
48    ShiftJis,
49    /// EUC-JP
50    EucJp,
51    /// ISO-2022-KR
52    Iso2022Kr,
53    /// EUC-KR
54    EucKr,
55    /// ISO-2022-JP
56    Iso2022Jp,
57    /// ISO-2022-JP-2
58    Iso2022Jp2,
59    /// ISO-8859-6-E
60    Iso88596E,
61    /// ISO-8859-6-I
62    Iso88596I,
63    /// ISO-8859-8-E
64    Iso88598E,
65    /// ISO-8859-8-I
66    Iso88598I,
67    /// GB2312
68    Gb2312,
69    /// Big5
70    Big5,
71    /// KOI8-R
72    Koi8R,
73    /// UTF-8
74    Utf8,
75    /// An arbitrary charset specified as a string
76    Unregistered(String),
77}
78
79const MAPPING: [(Charset, &'static str); 25] = [(UsAscii, "US-ASCII"),
80 (Iso88591, "ISO-8859-1"),
81 (Iso88592, "ISO-8859-2"),
82 (Iso88593, "ISO-8859-3"),
83 (Iso88594, "ISO-8859-4"),
84 (Iso88595, "ISO-8859-5"),
85 (Iso88596, "ISO-8859-6"),
86 (Iso88597, "ISO-8859-7"),
87 (Iso88598, "ISO-8859-8"),
88 (Iso88599, "ISO-8859-9"),
89 (Iso885910, "ISO-8859-10"),
90 (ShiftJis, "Shift-JIS"),
91 (EucJp, "EUC-JP"),
92 (Iso2022Kr, "ISO-2022-KR"),
93 (EucKr, "EUC-KR"),
94 (Iso2022Jp, "ISO-2022-JP"),
95 (Iso2022Jp2, "ISO-2022-JP-2"),
96 (Iso88596E, "ISO-8859-6-E"),
97 (Iso88596I, "ISO-8859-6-I"),
98 (Iso88598E, "ISO-8859-8-E"),
99 (Iso88598I, "ISO-8859-8-I"),
100 (Gb2312, "GB2312"),
101 (Big5, "5"),
102 (Koi8R, "KOI8-R"),
103 (Utf8, "utf-8")];
104
105impl Charset {
106    fn name(&self) -> &str {
107        if let &Unregistered(ref s) = self {
108            return &s[..];
109        }
110        MAPPING.iter()
111               .find(|&&(ref variant, _)| self == variant)
112               .map(|&(_, name)| name)
113               .unwrap()
114    }
115}
116
117impl Display for Charset {
118    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
119        f.write_str(self.name())
120    }
121}
122
123impl FromStr for Charset {
124    type Err = ();
125    fn from_str(s: &str) -> Result<Charset, ()> {
126        Ok(MAPPING.iter()
127                  .find(|&&(_, ref name)| name.eq_ignore_ascii_case(s))
128                  .map(|&(ref variant, _)| variant.to_owned())
129                  .unwrap_or(Unregistered(s.to_owned())))
130    }
131}
132
133impl PartialEq for Charset {
134    fn eq(&self, other: &Self) -> bool {
135        match (self, other) {
136            (&UsAscii, &UsAscii) |
137            (&Iso88591, &Iso88591) |
138            (&Iso88592, &Iso88592) |
139            (&Iso88593, &Iso88593) |
140            (&Iso88594, &Iso88594) |
141            (&Iso88595, &Iso88595) |
142            (&Iso88596, &Iso88596) |
143            (&Iso88597, &Iso88597) |
144            (&Iso88598, &Iso88598) |
145            (&Iso88599, &Iso88599) |
146            (&Iso885910, &Iso885910) |
147            (&ShiftJis, &ShiftJis) |
148            (&EucJp, &EucJp) |
149            (&Iso2022Kr, &Iso2022Kr) |
150            (&EucKr, &EucKr) |
151            (&Iso2022Jp, &Iso2022Jp) |
152            (&Iso2022Jp2, &Iso2022Jp2) |
153            (&Iso88596E, &Iso88596E) |
154            (&Iso88596I, &Iso88596I) |
155            (&Iso88598E, &Iso88598E) |
156            (&Iso88598I, &Iso88598I) |
157            (&Gb2312, &Gb2312) |
158            (&Big5, &Big5) |
159            (&Koi8R, &Koi8R) |
160            (&Utf8, &Utf8) => true,
161            (&Unregistered(ref s), &Unregistered(ref t)) => s.eq_ignore_ascii_case(t),
162            _ => false,
163        }
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170
171    #[test]
172    fn test_parse() {
173        assert_eq!(UsAscii, "us-ascii".parse().unwrap());
174        assert_eq!(UsAscii, "US-Ascii".parse().unwrap());
175        assert_eq!(UsAscii, "US-ASCII".parse().unwrap());
176        assert_eq!(ShiftJis, "Shift-JIS".parse().unwrap());
177        assert_eq!(Unregistered("ABCD".to_owned()), "abcd".parse().unwrap());
178    }
179
180    #[test]
181    fn test_display() {
182        assert_eq!("US-ASCII", UsAscii.to_string());
183        assert_eq!("ABCD", Unregistered("ABCD".to_owned()).to_string());
184    }
185
186    #[test]
187    fn test_cmp() {
188        assert!(Iso88593 == Iso88593);
189        assert!(UsAscii != Iso88593);
190        assert_eq!(Unregistered("foobar".to_owned()),
191                   Unregistered("FOOBAR".to_owned()));
192    }
193}