#![allow(clippy::module_name_repetitions)]
use std::collections::{HashMap, HashSet};
use std::env;
use std::hash::BuildHasher;
use std::io::Error as IoError;
use std::process::{Command, Stdio};
use anyhow::{anyhow, Error as AnyError};
use encoding::all::ISO_8859_1;
use encoding::{DecoderTrap, Encoding};
use once_cell::sync::Lazy;
use regex::{Error as RegexError, Regex};
use thiserror::Error;
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum UErr {
#[error("Could not decode the obtained list of locales")]
DecodeLocaleList(#[source] AnyError),
#[error("The {0} environment variable's value is not a valid string")]
InvalidEnvValue(String),
#[error("Internal utf8-locale error: {0}")]
Internal(String),
#[error("Internal error: could not compile the {0} regular expression")]
Regex(String, #[source] RegexError),
#[error("Internal error: could not extract the '{0}' regex group out of {1}")]
RegexCaptures(String, String),
#[error("Could not run the `{0}` program")]
RunProgram(String, #[source] IoError),
}
pub const LOCALE_VARIABLES: [&str; 14] = [
"LC_ALL",
"LANG",
"LC_MESSAGES",
"LC_COLLATE",
"LC_NAME",
"LC_IDENTIFICATION",
"LC_CTYPE",
"LC_NUMERIC",
"LC_TIME",
"LC_MONETARY",
"LC_PAPER",
"LC_ADDRESS",
"LC_TELEPHONE",
"LC_MEASUREMENT",
];
pub const UTF8_ENCODINGS: [&str; 2] = ["UTF-8", "utf8"];
pub const UTF8_LANGUAGES: [&str; 5] = ["C", "en", "de", "es", "it"];
pub const RE_LOCALE_NAME: &str = r"(?x) ^
(?P<lang> [a-zA-Z0-9]+ )
(?:
_
(?P<territory> [a-zA-Z0-9]+ )
)?
(?:
\.
(?P<codeset> [a-zA-Z0-9-]+ )
)?
(?:
@
(?P<modifier> [a-zA-Z0-9]+ )
)?
$ ";
fn build_weights(langs: &[&str]) -> (HashMap<String, usize>, usize) {
let mut res = HashMap::new();
for lang in langs {
let weight = res.len();
res.entry((*lang).to_owned()).or_insert(weight);
}
let unweight = res.len();
(res, unweight)
}
fn get_re_name() -> Result<&'static Regex, UErr> {
static RE_NAME: Lazy<Result<Regex, RegexError>> = Lazy::new(|| Regex::new(RE_LOCALE_NAME));
RE_NAME
.as_ref()
.map_err(|err| UErr::Regex("locale name".to_owned(), err.clone()))
}
#[inline]
pub fn detect_utf8_locale(languages: &[&str]) -> Result<String, UErr> {
let re_name = get_re_name()?;
let (weights, unweight) = build_weights(languages);
let raw = Command::new("locale")
.arg("-a")
.stderr(Stdio::inherit())
.output()
.map_err(|err| UErr::RunProgram("locale -a".to_owned(), err))?
.stdout;
let text = ISO_8859_1
.decode(&raw, DecoderTrap::Strict)
.map_err(|err| UErr::DecodeLocaleList(anyhow!("Could not decode a string: {}", err)))?;
Ok(text
.lines()
.try_fold(
("C".to_owned(), unweight),
|state, line| -> Result<(String, usize), UErr> {
Ok(match re_name.captures(line) {
None => state,
Some(caps) => match caps.name("codeset") {
None => state,
Some(value) => {
if UTF8_ENCODINGS.contains(&value.as_str()) {
let lang = caps
.name("lang")
.ok_or_else(|| {
UErr::RegexCaptures(
"lang".to_owned(),
format!("{:?}", caps),
)
})?
.as_str();
match weights.get(lang) {
None => state,
Some(&weight) => {
if weight < state.1 {
(line.to_owned(), weight)
} else {
state
}
}
}
} else {
state
}
}
},
})
},
)?
.0)
}
#[inline]
pub fn get_utf8_vars(languages: &[&str]) -> Result<HashMap<String, String>, UErr> {
let loc = detect_utf8_locale(languages)?;
let arr = [
("LC_ALL".to_owned(), loc),
("LANGUAGE".to_owned(), "".to_owned()),
];
Ok(arr.into_iter().collect())
}
#[inline]
pub fn get_utf8_env(languages: &[&str]) -> Result<HashMap<String, String>, UErr> {
Ok(env::vars().chain(get_utf8_vars(languages)?).collect())
}
#[inline]
pub fn get_preferred_languages<S: BuildHasher>(
env: &HashMap<String, String, S>,
names: &[&str],
) -> Result<Vec<String>, UErr> {
let re_name = get_re_name()?;
let mut res: Vec<String> = Vec::new();
for name in names {
if let Some(value) = env.get(&(*name).to_owned()) {
if let Some(caps) = re_name.captures(value) {
let cap = |group| {
caps.name(group)
.ok_or_else(|| UErr::RegexCaptures(group.to_owned(), format!("{:?}", caps)))
};
if UTF8_ENCODINGS.contains(&cap("codeset")?.as_str()) {
let lang = cap("lang")?.as_str().to_owned();
if !res.contains(&lang) {
res.push(lang);
}
}
}
}
}
if !res.contains(&"C".to_owned()) {
res.push("C".to_owned());
}
Ok(res)
}
#[derive(Debug, Default)]
#[non_exhaustive]
pub struct LanguagesDetect<'names> {
pub env: Option<HashMap<String, String>>,
pub names: Option<&'names [&'names str]>,
}
impl<'names> LanguagesDetect<'names> {
#[inline]
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn detect(self) -> Result<Vec<String>, UErr> {
let qnames = self.names.unwrap_or(&LOCALE_VARIABLES);
let qenv = self.env.map_or_else(
|| {
let vars: HashSet<String> = LOCALE_VARIABLES
.iter()
.map(|name| (*name).to_owned())
.collect();
env::vars_os()
.filter_map(|(os_name, os_value)| {
os_name.to_str().and_then(|name| {
vars.contains(name).then(|| match os_value.to_str() {
Some(value) => Ok((name.to_owned(), value.to_owned())),
None => Err(UErr::InvalidEnvValue(name.to_owned())),
})
})
})
.collect::<Result<_, _>>()
},
Ok,
)?;
get_preferred_languages(&qenv, qnames)
}
#[allow(clippy::missing_const_for_fn)]
#[inline]
#[must_use]
pub fn with_env(self, env: HashMap<String, String>) -> Self {
Self {
env: Some(env),
..self
}
}
#[allow(clippy::missing_const_for_fn)]
#[inline]
#[must_use]
pub fn with_names(self, names: &'names [&'names str]) -> Self {
Self {
names: Some(names),
..self
}
}
}
#[derive(Debug)]
#[non_exhaustive]
pub struct Utf8Environment {
pub env: HashMap<String, String>,
pub env_vars: HashMap<String, String>,
pub locale: String,
}
#[derive(Debug, Default)]
#[non_exhaustive]
pub struct Utf8Detect {
pub env: Option<HashMap<String, String>>,
pub languages: Option<Vec<String>>,
}
impl Utf8Detect {
#[inline]
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn detect(self) -> Result<Utf8Environment, UErr> {
let env_vars = match self.languages {
None => get_utf8_vars(&UTF8_LANGUAGES)?,
Some(langs) => {
let lvec: Vec<&str> = langs.iter().map(|lang| &**lang).collect();
get_utf8_vars(&lvec)?
}
};
let renv = self.env.unwrap_or_else(|| env::vars().collect());
let locale = env_vars
.get("LC_ALL")
.ok_or_else(|| {
UErr::Internal(format!(
"Internal error: no 'LC_ALL' after successful detection: {:?}",
env_vars
))
})?
.to_string();
Ok(Utf8Environment {
env: renv
.into_iter()
.chain(
env_vars
.iter()
.map(|(name, value)| (name.to_string(), value.to_string())),
)
.collect(),
env_vars,
locale,
})
}
#[allow(clippy::missing_const_for_fn)]
#[inline]
#[must_use]
pub fn with_env(self, env: HashMap<String, String>) -> Self {
Self {
env: Some(env),
..self
}
}
#[inline]
#[must_use]
pub fn with_languages(self, langs: Vec<String>) -> Self {
Self {
languages: Some(langs),
..self
}
}
}