1use std::{collections::HashMap, fmt, result, str::from_utf8};
4
5use anyhow::anyhow;
6use lazy_static::lazy_static;
7use log::debug;
8use serde::{Serialize, Serializer};
9use whatlang;
10
11use crate::Result;
12
13static ISO_639_CODES: &str = include_str!("data/language-codes-full.csv");
22
23struct LangMaps {
25 canonical_codes: HashMap<String, String>,
26 names: HashMap<String, String>,
27}
28
29fn iso_689_canonical_codes_and_names() -> LangMaps {
31 let mut canonical_codes = HashMap::new();
32 let mut names = HashMap::new();
33
34 let mut rdr = csv::Reader::from_reader(ISO_639_CODES.as_bytes());
36 let mut r = csv::StringRecord::new();
37 while rdr.read_record(&mut r).expect("error reading embedded CSV") {
38 let (a3b, a3t, a2, en, _fr) = (&r[0], &r[1], &r[2], &r[3], &r[4]);
39 if a2 != "null" {
40 if a3b != "null" {
41 canonical_codes.insert(a3b.to_owned(), a2.to_owned());
42 }
43 if a3t != "null" {
44 canonical_codes.insert(a3t.to_owned(), a2.to_owned());
45 }
46 names.insert(a2.to_owned(), en.to_owned());
47 } else {
48 if a3b != "null" {
49 names.insert(a3b.to_owned(), en.to_owned());
50 }
51 if a3t != "null" {
52 names.insert(a3t.to_owned(), en.to_owned());
53 }
54 }
55 }
56 LangMaps {
57 canonical_codes,
58 names,
59 }
60}
61
62lazy_static! {
65 static ref LANG_MAPS: LangMaps = iso_689_canonical_codes_and_names();
66}
67
68#[derive(Clone, Copy, PartialEq, Eq)]
70pub struct Lang {
71 code: [u8; 3],
72}
73
74impl Lang {
75 pub fn iso639(code: &str) -> Result<Lang> {
87 let canon = LANG_MAPS
88 .canonical_codes
89 .get(code)
90 .cloned()
91 .unwrap_or_else(|| code.to_owned());
92 let c = canon.as_bytes();
93 match (canon.is_ascii(), c.len()) {
94 (true, 2) => Ok(Lang {
95 code: [c[0], c[1], b' '],
96 }),
97 (true, 3) => Ok(Lang {
98 code: [c[0], c[1], c[2]],
99 }),
100 _ => Err(anyhow!("Unsupported language code: {}", code)),
101 }
102 }
103
104 pub fn as_str(&self) -> &str {
113 if self.code[2] == b' ' {
115 from_utf8(&self.code[..2]).unwrap()
116 } else {
117 from_utf8(&self.code).unwrap()
118 }
119 }
120
121 pub fn for_text(text: &str) -> Option<Lang> {
130 if let Some(info) = whatlang::detect(text) {
131 debug!("detected language: {:?}", info);
132 if info.is_reliable() {
133 return Lang::iso639(info.lang().code()).ok();
134 }
135 }
136 None
137 }
138
139 pub fn english_names(&self) -> Result<Vec<&'static str>> {
150 let name_str = LANG_MAPS
151 .names
152 .get(self.as_str())
153 .map(|s| s.as_str())
154 .ok_or_else(|| {
155 anyhow!("No English name for language code: {:?}", self.as_str())
156 })?;
157 Ok(name_str.split("; ").collect())
158 }
159}
160
161impl fmt::Debug for Lang {
162 fn fmt(&self, f: &mut fmt::Formatter) -> result::Result<(), fmt::Error> {
163 write!(f, "{}", self.as_str())
164 }
165}
166
167impl fmt::Display for Lang {
168 fn fmt(&self, f: &mut fmt::Formatter) -> result::Result<(), fmt::Error> {
169 write!(f, "{}", self.as_str())
170 }
171}
172
173impl Serialize for Lang {
174 fn serialize<S>(&self, serializer: S) -> result::Result<S::Ok, S::Error>
175 where
176 S: Serializer,
177 {
178 self.as_str().serialize(serializer)
179 }
180}