locale_codes/
language.rs

1/*!
2Codes for the representation of names of languages.
3
4These codes are widely used in many different disciplines, for example for
5bibliographic purposes, in the library community, as well as for computerized
6systems, and the representation of different language versions on websites.
7
8Using a code rather than the name of a language has many benefits as some
9languages are referred to by different groups in different ways, and two
10unrelated languages may share the same or similar name.
11
12* Part 1 (ISO 639-1:2002) provides a 2 letter code that has been designed
13  to represent most of the major languages of the world.
14* Part 2 (ISO 639-2:1998) provides a 3 letter code, which gives more possible
15  combinations, so ISO 639-2:1998 can cover more languages.
16* Part 3 (ISO 639-3:2007) provides a 3 letter code and aims to give as complete
17  a listing of languages as possible, including living, extinct and ancient languages.
18* Part 4 (ISO 639-4:2010) gives the general principles of language coding and
19  lays down guidelines for the use of ISO 639.
20* Part 5 (ISO 639-5:2008) provides a 3 letter code for language families
21  and groups (living and extinct).
22
23## Source - ISO 639
24
25The data used here is taken from
26[SIL International](https://iso639-3.sil.org/code_tables/download_tables).
27
28See also: [Native names for languages](https://www.omniglot.com/language/names.htm).
29*/
30
31use std::collections::HashMap;
32
33use serde::{Deserialize, Serialize};
34
35// ------------------------------------------------------------------------------------------------
36// Public Types
37// ------------------------------------------------------------------------------------------------
38
39/// The ISO 639 data identifies 3 classes of languages, each language is
40/// one of these classes only.
41#[derive(Serialize, Deserialize, Debug)]
42pub enum LanguageClass {
43    Individual,
44    MacroLanguage,
45    Special,
46}
47
48/// The type of the language in this this meaning is more concerning it's
49/// current usage.
50#[derive(Serialize, Deserialize, Debug)]
51pub enum LanguageType {
52    Ancient,
53    Constructed,
54    Extinct,
55    Historical,
56    Living,
57    Special,
58}
59
60/// A representation of registered language data maintained by ISO.
61#[derive(Serialize, Deserialize, Debug)]
62pub struct LanguageInfo {
63    /// The ISO 3-character language identifier
64    pub code: String,
65    /// The reference name, in English, used by the standard.
66    pub reference_name: String,
67    /// The indigenous name, if captured in the standard.
68    pub indigenous_name: Option<String>,
69    /// Common aliases.
70    pub other_names: Option<Vec<String>>,
71    pub bibliographic_code: Option<String>,
72    pub terminology_code: Option<String>,
73    pub short_code: Option<String>,
74    pub class: LanguageClass,
75    pub l_type: LanguageType,
76    /// if `class` is `LanguageClass::MacroLanguage` this is
77    /// a vector of family members of this language.
78    pub family_members: Option<Vec<String>>,
79}
80
81// ------------------------------------------------------------------------------------------------
82// Public Functions
83// ------------------------------------------------------------------------------------------------
84
85lazy_static! {
86    static ref LANGUAGES: HashMap<String, LanguageInfo> = load_languages_from_json();
87    static ref LOOKUP: HashMap<String, String> = make_language_lookup();
88}
89
90pub fn lookup(code: &str) -> Option<&'static LanguageInfo> {
91    debug!("language::lookup {}", code);
92    assert!(
93        code.len() == 2 || code.len() == 3,
94        "language code must be either 2, or 3, characters long."
95    );
96    match code.len() {
97        3 => match LANGUAGES.get(code) {
98            Some(v) => Some(v),
99            None => None,
100        },
101        2 => match LOOKUP.get(code) {
102            Some(v) => {
103                debug!("language::lookup {} -> {}", code, v);
104                lookup(v)
105            }
106            None => None,
107        },
108        _ => None,
109    }
110}
111
112pub fn all_codes() -> Vec<String> {
113    LANGUAGES.keys().cloned().collect()
114}
115
116// ------------------------------------------------------------------------------------------------
117// Generated Data
118// ------------------------------------------------------------------------------------------------
119
120fn load_languages_from_json() -> HashMap<String, LanguageInfo> {
121    info!("languages_from_json - loading JSON");
122    let raw_data = include_bytes!("data/languages.json");
123    let language_map: HashMap<String, LanguageInfo> = serde_json::from_slice(raw_data).unwrap();
124    info!(
125        "languages_from_json - loaded {} countries",
126        language_map.len()
127    );
128    language_map
129}
130
131fn make_language_lookup() -> HashMap<String, String> {
132    info!("load_language_lookup - create from COUNTRIES");
133    let mut lookup_map: HashMap<String, String> = HashMap::new();
134    for language in LANGUAGES.values() {
135        if let Some(short_code) = &language.short_code {
136            lookup_map.insert(short_code.to_string(), language.code.to_string());
137        }
138    }
139    info!(
140        "load_language_lookup - mapped {} countries",
141        lookup_map.len()
142    );
143    lookup_map
144}
145
146// ------------------------------------------------------------------------------------------------
147// Unit Tests
148// ------------------------------------------------------------------------------------------------
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    use serde_json::ser::to_string_pretty;
155
156    // --------------------------------------------------------------------------------------------
157    #[test]
158    fn test_language_loading() {
159        match lookup("aab") {
160            None => println!("test_language_loading NO 'aab'"),
161            Some(l) => println!("test_language_loading {:#?}", to_string_pretty(l)),
162        }
163    }
164}