locale_codes/
script.rs

1/*!
2Codes for the representation of names of scripts
3
4ISO 15924, Codes for the representation of names of scripts, defines two sets of
5codes for a number of writing systems (scripts). Each script is given both a
6four-letter code and a numeric one. Script is defined as "set of graphic characters
7used for the written form of one or more languages".
8
9Where possible the codes are derived from ISO 639-2 where the name of a script
10and the name of a language using the script are identical (example: Gujarātī ISO 639
11guj, ISO 15924 Gujr). Preference is given to the 639-2 Bibliographical codes, which
12is different from the otherwise often preferred use of the Terminological codes.
13
144-letter ISO 15924 codes are incorporated into the Language Subtag Registry for
15IETF language tags and so can be used in file formats that make use of such language
16tags. For example, they can be used in HTML and XML to help Web browsers determine which
17typeface to use for foreign text. This way one could differentiate, for example,
18between Serbian written in the Cyrillic (sr-Cyrl) or Latin (sr-Latn) script, or mark
19romanized text as such.
20
21ISO appointed the Unicode Consortium as the Registration Authority (RA) for the standard.
22
23## Source - ISO 15924
24
25The data used here is taken from
26[ISO](https://www.unicode.org/iso15924/iso15924-codes.html).
27
28*/
29
30use std::collections::HashMap;
31
32use serde::{Deserialize, Serialize};
33
34// ------------------------------------------------------------------------------------------------
35// Public Types
36// ------------------------------------------------------------------------------------------------
37
38/// A representation of registered script data maintained by ISO.
39#[derive(Serialize, Deserialize, Debug)]
40pub struct ScriptInfo {
41    /// The standard 3-character identifier for this script.
42    pub alphabetic_code: String,
43    /// The standard numeric identifier for this script.
44    pub numeric_code: u16,
45    /// The script name, in English.
46    pub name: String,
47    /// An optional alias for this script.
48    pub alias: Option<String>,
49}
50
51// ------------------------------------------------------------------------------------------------
52// Public Functions
53// ------------------------------------------------------------------------------------------------
54
55lazy_static! {
56    static ref SCRIPTS: HashMap<String, ScriptInfo> = load_scripts_from_json();
57    static ref NUMERIC_LOOKUP: HashMap<u16, String> = make_script_lookup();
58}
59
60pub fn lookup_by_alpha(alphabetic_code: &str) -> Option<&'static ScriptInfo> {
61    assert_eq!(
62        alphabetic_code.len(),
63        4,
64        "script code is expected to be 3 characters"
65    );
66    SCRIPTS.get(alphabetic_code)
67}
68
69pub fn lookup_by_numeric(numeric_code: &u16) -> Option<&'static ScriptInfo> {
70    match NUMERIC_LOOKUP.get(&numeric_code) {
71        Some(v) => lookup_by_alpha(v),
72        None => None,
73    }
74}
75
76pub fn all_alpha_codes() -> Vec<String> {
77    SCRIPTS.keys().cloned().collect()
78}
79
80pub fn all_numeric_codes() -> Vec<u16> {
81    NUMERIC_LOOKUP.keys().cloned().collect()
82}
83
84// ------------------------------------------------------------------------------------------------
85// Generated Data
86// ------------------------------------------------------------------------------------------------
87
88fn load_scripts_from_json() -> HashMap<String, ScriptInfo> {
89    info!("scripts_from_json - loading JSON");
90    let raw_data = include_bytes!("data/scripts.json");
91    let script_map: HashMap<String, ScriptInfo> = serde_json::from_slice(raw_data).unwrap();
92    info!("scripts_from_json - loaded {} codesets", script_map.len());
93    script_map
94}
95
96fn make_script_lookup() -> HashMap<u16, String> {
97    info!("load_script_lookup - create from SCRIPTS");
98    let mut lookup_map: HashMap<u16, String> = HashMap::new();
99    for script in SCRIPTS.values() {
100        debug!("{} -> {}", &script.numeric_code, &script.alphabetic_code);
101        lookup_map.insert(script.numeric_code, script.alphabetic_code.to_string());
102    }
103    info!("load_script_lookup - mapped {} countries", lookup_map.len());
104    lookup_map
105}
106
107// ------------------------------------------------------------------------------------------------
108// Unit Tests
109// ------------------------------------------------------------------------------------------------
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114
115    // --------------------------------------------------------------------------------------------
116    #[test]
117    fn test_good_script_alpha_code() {
118        match lookup_by_alpha("Hluw") {
119            None => panic!("was expecting a script"),
120            Some(script) => {
121                assert_eq!(script.alphabetic_code.to_string(), "Hluw".to_string());
122                assert_eq!(script.numeric_code, 80);
123                //assert_eq!(script.alias.unwrap().to_string(), "Anatolian_Hieroglyphs".to_string())
124            }
125        }
126    }
127
128    #[test]
129    fn test_bad_script_alpha_code() {
130        match lookup_by_alpha(&"UTF8") {
131            None => (),
132            Some(_) => panic!("was expecting a None in response"),
133        }
134    }
135
136    #[test]
137    fn test_good_script_numeric_code() {
138        match lookup_by_numeric(&80) {
139            None => panic!("was expecting a script"),
140            Some(script) => {
141                assert_eq!(script.alphabetic_code.to_string(), "Hluw".to_string());
142                assert_eq!(script.numeric_code, 80);
143                //assert_eq!(script.alias.unwrap().to_string(), "Anatolian_Hieroglyphs".to_string())
144            }
145        }
146    }
147
148    #[test]
149    fn test_bad_script_numeric_code() {
150        match lookup_by_numeric(&0) {
151            None => (),
152            Some(_) => panic!("was expecting a None in response"),
153        }
154    }
155
156    #[test]
157    fn test_script_codes() {
158        let codes = all_alpha_codes();
159        assert!(codes.len() > 0);
160        let numerics = all_numeric_codes();
161        assert!(numerics.len() > 0);
162    }
163}