locale_codes/language.rs
1/*!
2Codes for the representation of names of languages.
3
4These codes are widely used in many different disciplines, for example for
5bibliographic purposes, in the library community, as well as for computerized
6systems, and the representation of different language versions on websites.
7
8Using a code rather than the name of a language has many benefits as some
9languages are referred to by different groups in different ways, and two
10unrelated languages may share the same or similar name.
11
12* Part 1 (ISO 639-1:2002) provides a 2 letter code that has been designed
13 to represent most of the major languages of the world.
14* Part 2 (ISO 639-2:1998) provides a 3 letter code, which gives more possible
15 combinations, so ISO 639-2:1998 can cover more languages.
16* Part 3 (ISO 639-3:2007) provides a 3 letter code and aims to give as complete
17 a listing of languages as possible, including living, extinct and ancient languages.
18* Part 4 (ISO 639-4:2010) gives the general principles of language coding and
19 lays down guidelines for the use of ISO 639.
20* Part 5 (ISO 639-5:2008) provides a 3 letter code for language families
21 and groups (living and extinct).
22
23## Source - ISO 639
24
25The data used here is taken from
26[SIL International](https://iso639-3.sil.org/code_tables/download_tables).
27
28See also: [Native names for languages](https://www.omniglot.com/language/names.htm).
29*/
30
31use std::collections::HashMap;
32
33use serde::{Deserialize, Serialize};
34
35// ------------------------------------------------------------------------------------------------
36// Public Types
37// ------------------------------------------------------------------------------------------------
38
39/// The ISO 639 data identifies 3 classes of languages, each language is
40/// one of these classes only.
41#[derive(Serialize, Deserialize, Debug)]
42pub enum LanguageClass {
43 Individual,
44 MacroLanguage,
45 Special,
46}
47
48/// The type of the language in this this meaning is more concerning it's
49/// current usage.
50#[derive(Serialize, Deserialize, Debug)]
51pub enum LanguageType {
52 Ancient,
53 Constructed,
54 Extinct,
55 Historical,
56 Living,
57 Special,
58}
59
60/// A representation of registered language data maintained by ISO.
61#[derive(Serialize, Deserialize, Debug)]
62pub struct LanguageInfo {
63 /// The ISO 3-character language identifier
64 pub code: String,
65 /// The reference name, in English, used by the standard.
66 pub reference_name: String,
67 /// The indigenous name, if captured in the standard.
68 pub indigenous_name: Option<String>,
69 /// Common aliases.
70 pub other_names: Option<Vec<String>>,
71 pub bibliographic_code: Option<String>,
72 pub terminology_code: Option<String>,
73 pub short_code: Option<String>,
74 pub class: LanguageClass,
75 pub l_type: LanguageType,
76 /// if `class` is `LanguageClass::MacroLanguage` this is
77 /// a vector of family members of this language.
78 pub family_members: Option<Vec<String>>,
79}
80
81// ------------------------------------------------------------------------------------------------
82// Public Functions
83// ------------------------------------------------------------------------------------------------
84
85lazy_static! {
86 static ref LANGUAGES: HashMap<String, LanguageInfo> = load_languages_from_json();
87 static ref LOOKUP: HashMap<String, String> = make_language_lookup();
88}
89
90pub fn lookup(code: &str) -> Option<&'static LanguageInfo> {
91 debug!("language::lookup {}", code);
92 assert!(
93 code.len() == 2 || code.len() == 3,
94 "language code must be either 2, or 3, characters long."
95 );
96 match code.len() {
97 3 => match LANGUAGES.get(code) {
98 Some(v) => Some(v),
99 None => None,
100 },
101 2 => match LOOKUP.get(code) {
102 Some(v) => {
103 debug!("language::lookup {} -> {}", code, v);
104 lookup(v)
105 }
106 None => None,
107 },
108 _ => None,
109 }
110}
111
112pub fn all_codes() -> Vec<String> {
113 LANGUAGES.keys().cloned().collect()
114}
115
116// ------------------------------------------------------------------------------------------------
117// Generated Data
118// ------------------------------------------------------------------------------------------------
119
120fn load_languages_from_json() -> HashMap<String, LanguageInfo> {
121 info!("languages_from_json - loading JSON");
122 let raw_data = include_bytes!("data/languages.json");
123 let language_map: HashMap<String, LanguageInfo> = serde_json::from_slice(raw_data).unwrap();
124 info!(
125 "languages_from_json - loaded {} countries",
126 language_map.len()
127 );
128 language_map
129}
130
131fn make_language_lookup() -> HashMap<String, String> {
132 info!("load_language_lookup - create from COUNTRIES");
133 let mut lookup_map: HashMap<String, String> = HashMap::new();
134 for language in LANGUAGES.values() {
135 if let Some(short_code) = &language.short_code {
136 lookup_map.insert(short_code.to_string(), language.code.to_string());
137 }
138 }
139 info!(
140 "load_language_lookup - mapped {} countries",
141 lookup_map.len()
142 );
143 lookup_map
144}
145
146// ------------------------------------------------------------------------------------------------
147// Unit Tests
148// ------------------------------------------------------------------------------------------------
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153
154 use serde_json::ser::to_string_pretty;
155
156 // --------------------------------------------------------------------------------------------
157 #[test]
158 fn test_language_loading() {
159 match lookup("aab") {
160 None => println!("test_language_loading NO 'aab'"),
161 Some(l) => println!("test_language_loading {:#?}", to_string_pretty(l)),
162 }
163 }
164}