#![warn(clippy::all)]
#![warn(missing_docs)]
#![warn(missing_doc_code_examples)]
#[cfg(feature = "enum")]
use {std::string::ToString, strum_macros};
#[cfg(feature = "enum")]
#[non_exhaustive]
#[derive(strum_macros::ToString, Debug, Copy, Clone, PartialEq, strum_macros::EnumString)]
pub enum LANGUAGE {
Arabic,
Azerbaijani,
Catalan,
Danish,
English,
French,
Hindi,
Indonesian,
Norwegian,
Portuguese,
Russian,
Spanish,
Turkish,
Vietnamese,
Bulgarian,
Czech,
Dutch,
Finnish,
German,
Hungarian,
Italian,
Polish,
Romanian,
Slovak,
Swedish,
Ukrainian,
Hebrew,
Greek,
Kazakh,
Nepali,
Slovenian,
Tajik,
}
pub const LANGUAGES: [&str; 32] = [
"arabic",
"azerbaijani",
"catalan",
"danish",
"english",
"french",
"hindi",
"indonesian",
"norwegian",
"portuguese",
"russian",
"spanish",
"turkish",
"vietnamese",
"bulgarian",
"czech",
"dutch",
"finnish",
"german",
"hungarian",
"italian",
"polish",
"romanian",
"slovak",
"swedish",
"ukrainian",
"hebrew",
"greek",
"kazakh",
"nepali",
"slovenian",
"tajik",
];
pub const LANGUAGES_ISO_693_1: [&str; 32] = [
"ar", "az", "ca", "da", "en", "fr", "hi", "in", "nn", "pt", "ru", "es", "tr", "vi", "bg", "cs",
"nl", "fi", "de", "hu", "it", "pl", "ro", "sk", "sv", "uk", "he", "el", "kk", "ne", "sl", "tg",
];
pub const LANGUAGES_ISO_693_2T: [&str; 32] = [
"ara", "aze", "cat", "dan", "eng", "fra", "hin", "ind", "nno", "por", "rus", "spa", "tur",
"vie", "bul", "ces", "nld", "fin", "deu", "hun", "ita", "pol", "ron", "slk", "swe", "ukr",
"heb", "ell", "kaz", "nep", "slv", "tgk",
];
macro_rules! string_match {
(
$($language:expr)*,
$(
$directory:literal, [$( $lang:literal ),*]
),*
) =>
{
match $( $language )? {
$(
$(
$lang => read_from_bytes(include_bytes!(concat!($directory, "/", $lang))),
)*
)*
_ => panic!(concat!("Unfortunately, the '{}' language is not currently supported. Please make sure that the name of the language is spelled in English."), $( $language )? )
}
}
}
pub fn get(
#[cfg(feature = "enum")] input_language: LANGUAGE,
#[cfg(not(feature = "enum"))] input_language: &'static str,
) -> Vec<String> {
string_match!(
parse(input_language),
"savand",
[
"english",
"hebrew",
"arabic",
"catalan",
"danish",
"french",
"hindi",
"indonesian",
"norwegian",
"portuguese",
"russian",
"spanish",
"turkish",
"vietnamese",
"bulgarian",
"czech",
"dutch",
"finnish",
"german",
"hungarian",
"italian",
"polish",
"romanian",
"slovak",
"swedish",
"ukrainian"
],
"nltk",
[
"azerbaijani",
"greek",
"kazakh",
"nepali",
"slovenian",
"tajik"
]
)
}
pub fn get_nltk(
#[cfg(feature = "enum")] input_language: LANGUAGE,
#[cfg(not(feature = "enum"))] input_language: &'static str,
) -> Vec<String> {
string_match!(
parse(input_language),
"nltk",
[
"english",
"arabic",
"danish",
"french",
"indonesian",
"norwegian",
"portuguese",
"russian",
"spanish",
"turkish",
"greek",
"dutch",
"finnish",
"german",
"hungarian",
"italian",
"romanian",
"swedish",
"azerbaijani",
"kazakh",
"nepali",
"slovenian",
"tajik"
]
)
}
fn parse(
#[cfg(feature = "enum")] input_language: LANGUAGE,
#[cfg(not(feature = "enum"))] input_language: &'static str,
) -> &'static str {
#[cfg(feature = "enum")]
let target_string: &str = Box::leak(input_language.to_string().to_lowercase().into_boxed_str());
#[cfg(not(feature = "enum"))]
let target_string: &str = get_language_from_code(input_language);
return target_string;
}
#[cfg(not(feature = "enum"))]
fn get_language_from_code(code: &str) -> &str {
if code.len() == 2 {
get_language_from_iso(code, LANGUAGES_ISO_693_1)
} else if code.len() == 3 {
get_language_from_iso(code, LANGUAGES_ISO_693_2T)
} else {
code
}
}
#[cfg(not(feature = "enum"))]
fn get_language_from_iso<'a>(code: &'a str, library: [&str; 32]) -> &'a str {
let mut iter = library.iter();
let idx = iter.position(|&x| x == code);
match idx {
Some(x) => LANGUAGES[x],
None => panic!("It looks like you're trying to use an ISO 693 language code. Unfortunately, the {} language code is not currently supported.", code),
}
}
fn read_from_bytes(bytes: &[u8]) -> Vec<String> {
let contents = String::from_utf8_lossy(bytes);
let split_contents = contents.split('\n');
let mut output = vec![];
for word in split_contents {
output.push(String::from(word));
}
output
}