mod backend;
mod config;
mod model_manager;
pub use backend::PaddleOcrBackend;
pub use config::{PaddleLanguage, PaddleOcrConfig};
pub use model_manager::{
CacheStats, ModelManager, ModelManifestEntry, ModelPaths, RecModelPaths, ResolvedRecModel, SharedModelPaths,
};
pub const SUPPORTED_LANGUAGES: &[&str] = &[
"ch", "en", "french", "german", "korean", "japan", "chinese_cht", "latin", "cyrillic", "thai", "greek", "arabic", "devanagari", "tamil", "telugu", ];
pub fn is_language_supported(lang: &str) -> bool {
SUPPORTED_LANGUAGES.contains(&lang)
}
pub fn language_to_script_family(paddle_lang: &str) -> &'static str {
match paddle_lang {
"en" => "english",
"ch" | "japan" | "chinese_cht" => "chinese",
"korean" => "korean",
"french" | "german" | "latin" => "latin",
"cyrillic" => "eslav",
"thai" => "thai",
"greek" => "greek",
"arabic" => "arabic",
"devanagari" => "devanagari",
"tamil" => "tamil",
"telugu" => "telugu",
_ => "english",
}
}
pub fn map_language_code(kreuzberg_code: &str) -> Option<&'static str> {
match kreuzberg_code {
"ch" | "chi_sim" | "zho" | "zh" | "chinese" => Some("ch"),
"en" | "eng" | "english" => Some("en"),
"fr" | "fra" | "french" => Some("french"),
"de" | "deu" | "german" => Some("german"),
"ko" | "kor" | "korean" => Some("korean"),
"ja" | "jpn" | "japanese" | "japan" => Some("japan"),
"chi_tra" | "zh_tw" | "zh_hant" | "chinese_cht" => Some("chinese_cht"),
"ru" | "rus" | "russian" | "uk" | "ukr" | "ukrainian" | "be" | "bel" | "belarusian" | "cyrillic" => {
Some("cyrillic")
}
"th" | "tha" | "thai" => Some("thai"),
"el" | "ell" | "greek" => Some("greek"),
"ar" | "ara" | "arabic" | "fa" | "fas" | "persian" | "ur" | "urd" | "urdu" => Some("arabic"),
"hi" | "hin" | "hindi" | "mr" | "mar" | "marathi" | "sa" | "san" | "sanskrit" | "ne" | "nep" | "nepali"
| "devanagari" => Some("devanagari"),
"ta" | "tam" | "tamil" => Some("tamil"),
"te" | "tel" | "telugu" => Some("telugu"),
"latin" | "es" | "spa" | "spanish" | "it" | "ita" | "italian" | "pt" | "por" | "portuguese" | "nl" | "nld"
| "dutch" | "pl" | "pol" | "polish" | "sv" | "swe" | "swedish" | "da" | "dan" | "danish" | "no" | "nor"
| "norwegian" | "fi" | "fin" | "finnish" | "cs" | "ces" | "czech" | "sk" | "slk" | "slovak" | "hr" | "hrv"
| "croatian" | "hu" | "hun" | "hungarian" | "ro" | "ron" | "romanian" | "tr" | "tur" | "turkish" | "id"
| "ind" | "indonesian" | "ms" | "msa" | "malay" | "vi" | "vie" | "vietnamese" => Some("latin"),
_ => None,
}
}