locale_codes/
codeset.rs

1/*!
2Character sets registered with IANA.
3
4These are the official names for character sets that may be used in
5the Internet and may be referred to in Internet documentation.  These
6names are expressed in ANSI_X3.4-1968 which is commonly called
7US-ASCII or simply ASCII.  The character set most commonly use in the
8Internet and used especially in protocol standards is US-ASCII, this
9is strongly encouraged.  The use of the name US-ASCII is also
10encouraged.
11
12## Source - IANA
13
14The data used here is taken from the tables in the html page
15[IANA](https://www.iana.org/assignments/character-sets/character-sets.xhtml).
16
17See also: [RFC-2978](https://tools.ietf.org/html/rfc2978) IANA Charset
18Registration Procedures.
19*/
20
21use std::collections::HashMap;
22
23use serde::{Deserialize, Serialize};
24
25// ------------------------------------------------------------------------------------------------
26// Public Types
27// ------------------------------------------------------------------------------------------------
28
29/// A representation of registrered character set data that maintained by IANA.
30#[derive(Serialize, Deserialize, Debug)]
31pub struct CodesetInfo {
32    /// The name, not a code, for this code set.
33    pub name: String,
34    /// Any well known aliases for this code set.
35    pub also_known_as: Vec<String>,
36    /// The IANA registered MIB code.
37    pub mib_code: u32,
38    /// Sources identified in the IANA registration.
39    pub source: Option<String>,
40    /// References identified in the IANA registration.
41    pub references: Option<String>,
42}
43
44// ------------------------------------------------------------------------------------------------
45// Public Functions
46// ------------------------------------------------------------------------------------------------
47
48lazy_static! {
49    static ref CODESETS: HashMap<String, CodesetInfo> = load_code_sets_from_json();
50}
51
52/// Lookup a `CodesetInfo` based on it's name, returning `None` if the name
53/// does not exist in the current IANA data set.
54pub fn lookup(name: &str) -> Option<&'static CodesetInfo> {
55    assert!(name.len() > 0, "codeset name may not be empty");
56    CODESETS.get(name)
57}
58
59/// Return all the registered script names.
60pub fn all_names() -> Vec<String> {
61    CODESETS.keys().cloned().collect()
62}
63
64// ------------------------------------------------------------------------------------------------
65// Generated Data
66// ------------------------------------------------------------------------------------------------
67
68fn load_code_sets_from_json() -> HashMap<String, CodesetInfo> {
69    info!("load_code_sets_from_json - loading JSON");
70    let raw_data = include_bytes!("data/codesets.json");
71    let code_set_map: HashMap<String, CodesetInfo> = serde_json::from_slice(raw_data).unwrap();
72    info!(
73        "load_code_sets_from_json - loaded {} codes ets",
74        code_set_map.len()
75    );
76    code_set_map
77}
78
79// ------------------------------------------------------------------------------------------------
80// Unit Tests
81// ------------------------------------------------------------------------------------------------
82
83#[cfg(test)]
84mod tests {
85    use super::*;
86
87    // --------------------------------------------------------------------------------------------
88    #[test]
89    fn test_good_codeset_code() {
90        match lookup("UTF-8") {
91            None => panic!("was expecting a codeset"),
92            Some(codeset) => assert_eq!(codeset.name.to_string(), "UTF-8".to_string()),
93        }
94    }
95
96    #[test]
97    fn test_bad_codeset_code() {
98        match lookup(&"UTF-99") {
99            None => (),
100            Some(_) => panic!("was expecting a None in response"),
101        }
102    }
103}