locale_codes/script.rs
1/*!
2Codes for the representation of names of scripts
3
4ISO 15924, Codes for the representation of names of scripts, defines two sets of
5codes for a number of writing systems (scripts). Each script is given both a
6four-letter code and a numeric one. Script is defined as "set of graphic characters
7used for the written form of one or more languages".
8
9Where possible the codes are derived from ISO 639-2 where the name of a script
10and the name of a language using the script are identical (example: Gujarātī ISO 639
11guj, ISO 15924 Gujr). Preference is given to the 639-2 Bibliographical codes, which
12is different from the otherwise often preferred use of the Terminological codes.
13
144-letter ISO 15924 codes are incorporated into the Language Subtag Registry for
15IETF language tags and so can be used in file formats that make use of such language
16tags. For example, they can be used in HTML and XML to help Web browsers determine which
17typeface to use for foreign text. This way one could differentiate, for example,
18between Serbian written in the Cyrillic (sr-Cyrl) or Latin (sr-Latn) script, or mark
19romanized text as such.
20
21ISO appointed the Unicode Consortium as the Registration Authority (RA) for the standard.
22
23## Source - ISO 15924
24
25The data used here is taken from
26[ISO](https://www.unicode.org/iso15924/iso15924-codes.html).
27
28*/
29
30use std::collections::HashMap;
31
32use serde::{Deserialize, Serialize};
33
34// ------------------------------------------------------------------------------------------------
35// Public Types
36// ------------------------------------------------------------------------------------------------
37
38/// A representation of registered script data maintained by ISO.
39#[derive(Serialize, Deserialize, Debug)]
40pub struct ScriptInfo {
41 /// The standard 3-character identifier for this script.
42 pub alphabetic_code: String,
43 /// The standard numeric identifier for this script.
44 pub numeric_code: u16,
45 /// The script name, in English.
46 pub name: String,
47 /// An optional alias for this script.
48 pub alias: Option<String>,
49}
50
51// ------------------------------------------------------------------------------------------------
52// Public Functions
53// ------------------------------------------------------------------------------------------------
54
55lazy_static! {
56 static ref SCRIPTS: HashMap<String, ScriptInfo> = load_scripts_from_json();
57 static ref NUMERIC_LOOKUP: HashMap<u16, String> = make_script_lookup();
58}
59
60pub fn lookup_by_alpha(alphabetic_code: &str) -> Option<&'static ScriptInfo> {
61 assert_eq!(
62 alphabetic_code.len(),
63 4,
64 "script code is expected to be 3 characters"
65 );
66 SCRIPTS.get(alphabetic_code)
67}
68
69pub fn lookup_by_numeric(numeric_code: &u16) -> Option<&'static ScriptInfo> {
70 match NUMERIC_LOOKUP.get(&numeric_code) {
71 Some(v) => lookup_by_alpha(v),
72 None => None,
73 }
74}
75
76pub fn all_alpha_codes() -> Vec<String> {
77 SCRIPTS.keys().cloned().collect()
78}
79
80pub fn all_numeric_codes() -> Vec<u16> {
81 NUMERIC_LOOKUP.keys().cloned().collect()
82}
83
84// ------------------------------------------------------------------------------------------------
85// Generated Data
86// ------------------------------------------------------------------------------------------------
87
88fn load_scripts_from_json() -> HashMap<String, ScriptInfo> {
89 info!("scripts_from_json - loading JSON");
90 let raw_data = include_bytes!("data/scripts.json");
91 let script_map: HashMap<String, ScriptInfo> = serde_json::from_slice(raw_data).unwrap();
92 info!("scripts_from_json - loaded {} codesets", script_map.len());
93 script_map
94}
95
96fn make_script_lookup() -> HashMap<u16, String> {
97 info!("load_script_lookup - create from SCRIPTS");
98 let mut lookup_map: HashMap<u16, String> = HashMap::new();
99 for script in SCRIPTS.values() {
100 debug!("{} -> {}", &script.numeric_code, &script.alphabetic_code);
101 lookup_map.insert(script.numeric_code, script.alphabetic_code.to_string());
102 }
103 info!("load_script_lookup - mapped {} countries", lookup_map.len());
104 lookup_map
105}
106
107// ------------------------------------------------------------------------------------------------
108// Unit Tests
109// ------------------------------------------------------------------------------------------------
110
111#[cfg(test)]
112mod tests {
113 use super::*;
114
115 // --------------------------------------------------------------------------------------------
116 #[test]
117 fn test_good_script_alpha_code() {
118 match lookup_by_alpha("Hluw") {
119 None => panic!("was expecting a script"),
120 Some(script) => {
121 assert_eq!(script.alphabetic_code.to_string(), "Hluw".to_string());
122 assert_eq!(script.numeric_code, 80);
123 //assert_eq!(script.alias.unwrap().to_string(), "Anatolian_Hieroglyphs".to_string())
124 }
125 }
126 }
127
128 #[test]
129 fn test_bad_script_alpha_code() {
130 match lookup_by_alpha(&"UTF8") {
131 None => (),
132 Some(_) => panic!("was expecting a None in response"),
133 }
134 }
135
136 #[test]
137 fn test_good_script_numeric_code() {
138 match lookup_by_numeric(&80) {
139 None => panic!("was expecting a script"),
140 Some(script) => {
141 assert_eq!(script.alphabetic_code.to_string(), "Hluw".to_string());
142 assert_eq!(script.numeric_code, 80);
143 //assert_eq!(script.alias.unwrap().to_string(), "Anatolian_Hieroglyphs".to_string())
144 }
145 }
146 }
147
148 #[test]
149 fn test_bad_script_numeric_code() {
150 match lookup_by_numeric(&0) {
151 None => (),
152 Some(_) => panic!("was expecting a None in response"),
153 }
154 }
155
156 #[test]
157 fn test_script_codes() {
158 let codes = all_alpha_codes();
159 assert!(codes.len() > 0);
160 let numerics = all_numeric_codes();
161 assert!(numerics.len() > 0);
162 }
163}