use serde::Deserialize;
use std::sync::LazyLock;
#[derive(Debug, Clone, Deserialize)]
pub struct LangConfig {
#[serde(default)]
pub intent_phrases: Vec<String>,
#[serde(default)]
pub action_verbs: Vec<String>,
#[serde(default)]
pub line_start_re: String,
#[serde(default)]
pub work_announcement_re: String,
#[serde(default)]
pub completion_claims: Vec<String>,
#[serde(default)]
pub gerund_re: String,
#[serde(default)]
pub trailing_colon_re: String,
#[serde(default)]
pub now_imperative_re: String,
#[serde(default)]
pub numbered_steps_re: String,
#[serde(default)]
pub past_tense_standalone_re: String,
#[serde(default)]
pub path_re: String,
#[serde(default)]
pub ext_re: String,
#[serde(default)]
pub backtick_code_re: String,
}
const EN_TOML: &str = include_str!("en.toml");
const RU_TOML: &str = include_str!("ru.toml");
const ES_TOML: &str = include_str!("es.toml");
const PT_TOML: &str = include_str!("pt.toml");
const FR_TOML: &str = include_str!("fr.toml");
pub(crate) static LANG_EN: LazyLock<LangConfig> =
LazyLock::new(|| toml::from_str(EN_TOML).expect("BUG: en.toml failed to parse at runtime"));
pub(crate) static LANG_RU: LazyLock<LangConfig> =
LazyLock::new(|| toml::from_str(RU_TOML).expect("BUG: ru.toml failed to parse at runtime"));
pub(crate) static LANG_ES: LazyLock<LangConfig> =
LazyLock::new(|| toml::from_str(ES_TOML).expect("BUG: es.toml failed to parse at runtime"));
pub(crate) static LANG_PT: LazyLock<LangConfig> =
LazyLock::new(|| toml::from_str(PT_TOML).expect("BUG: pt.toml failed to parse at runtime"));
pub(crate) static LANG_FR: LazyLock<LangConfig> =
LazyLock::new(|| toml::from_str(FR_TOML).expect("BUG: fr.toml failed to parse at runtime"));
pub fn detect_language(text: &str) -> &'static LangConfig {
let mut cyrillic = 0u32;
let mut latin_accent = 0u32;
let mut total_alpha = 0u32;
for ch in text.chars().take(500) {
if ch.is_alphabetic() {
total_alpha += 1;
if ('\u{0400}'..='\u{04FF}').contains(&ch) {
cyrillic += 1;
} else if ('\u{00C0}'..='\u{024F}').contains(&ch) {
latin_accent += 1;
}
}
}
if total_alpha == 0 {
return &LANG_EN;
}
if cyrillic * 5 > total_alpha {
return &LANG_RU;
}
if latin_accent > 0 {
if text.contains('ã')
|| text.contains('õ')
|| text.contains('ç')
|| text.contains('Ã')
|| text.contains('Õ')
|| text.contains('Ç')
{
return &LANG_PT;
}
if text.contains('ñ') || text.contains('Ñ') || text.contains('¿') || text.contains('¡')
{
return &LANG_ES;
}
if text.contains('à')
|| text.contains('â')
|| text.contains('é')
|| text.contains('è')
|| text.contains('ê')
|| text.contains('ë')
|| text.contains('î')
|| text.contains('ï')
|| text.contains('ô')
|| text.contains('û')
|| text.contains('ù')
|| text.contains('ü')
|| text.contains('ÿ')
{
return &LANG_FR;
}
}
&LANG_EN
}
pub fn all_langs() -> [&'static LangConfig; 5] {
[&LANG_EN, &LANG_RU, &LANG_ES, &LANG_PT, &LANG_FR]
}