use ahash::AHashMap;
use std::sync::OnceLock;
use super::backends;
static LANGUAGE_REGISTRY: OnceLock<LanguageRegistry> = OnceLock::new();
#[derive(Debug, Clone)]
pub struct LanguageRegistry {
backends: AHashMap<String, Vec<String>>,
}
impl LanguageRegistry {
pub fn new() -> Self {
let mut registry = Self {
backends: AHashMap::new(),
};
registry
.backends
.insert("easyocr".to_string(), backends::easyocr::languages());
registry
.backends
.insert("paddleocr".to_string(), backends::paddleocr::languages());
registry
.backends
.insert("tesseract".to_string(), backends::tesseract::languages());
registry
}
pub fn global() -> &'static Self {
LANGUAGE_REGISTRY.get_or_init(Self::new)
}
pub fn get_supported_languages(&self, backend: &str) -> Option<&[String]> {
self.backends.get(backend).map(|v| v.as_slice())
}
pub fn is_language_supported(&self, backend: &str, language: &str) -> bool {
self.backends
.get(backend)
.map(|langs| langs.contains(&language.to_string()))
.unwrap_or(false)
}
pub fn get_backends(&self) -> Vec<String> {
let mut backends: Vec<_> = self.backends.keys().cloned().collect();
backends.sort();
backends
}
pub fn get_language_count(&self, backend: &str) -> usize {
self.backends.get(backend).map(|langs| langs.len()).unwrap_or(0)
}
}
impl Default for LanguageRegistry {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_registry_creation() {
let registry = LanguageRegistry::new();
assert!(!registry.backends.is_empty());
}
#[test]
fn test_easyocr_languages() {
let registry = LanguageRegistry::new();
let languages = registry
.get_supported_languages("easyocr")
.expect("EasyOCR backend not found");
assert_eq!(languages.len(), 83);
assert!(languages.contains(&"en".to_string()));
assert!(languages.contains(&"fr".to_string()));
assert!(languages.contains(&"de".to_string()));
assert!(languages.contains(&"ch_sim".to_string()));
assert!(languages.contains(&"ch_tra".to_string()));
}
#[test]
fn test_paddleocr_languages() {
let registry = LanguageRegistry::new();
let languages = registry
.get_supported_languages("paddleocr")
.expect("PaddleOCR backend not found");
assert_eq!(languages.len(), 14);
assert!(languages.contains(&"en".to_string()));
assert!(languages.contains(&"ch".to_string()));
assert!(languages.contains(&"french".to_string()));
assert!(languages.contains(&"german".to_string()));
}
#[test]
fn test_tesseract_languages() {
let registry = LanguageRegistry::new();
let languages = registry
.get_supported_languages("tesseract")
.expect("Tesseract backend not found");
assert!(languages.len() >= 100);
assert!(languages.contains(&"eng".to_string()));
assert!(languages.contains(&"fra".to_string()));
assert!(languages.contains(&"deu".to_string()));
}
#[test]
fn test_get_unsupported_backend() {
let registry = LanguageRegistry::new();
assert_eq!(registry.get_supported_languages("nonexistent"), None);
}
#[test]
fn test_is_language_supported() {
let registry = LanguageRegistry::new();
assert!(registry.is_language_supported("easyocr", "en"));
assert!(registry.is_language_supported("easyocr", "fr"));
assert!(!registry.is_language_supported("easyocr", "invalid"));
assert!(registry.is_language_supported("paddleocr", "en"));
assert!(!registry.is_language_supported("paddleocr", "invalid"));
assert!(registry.is_language_supported("tesseract", "eng"));
assert!(!registry.is_language_supported("tesseract", "invalid"));
}
#[test]
fn test_get_backends() {
let registry = LanguageRegistry::new();
let backends = registry.get_backends();
assert_eq!(backends.len(), 3);
assert!(backends.contains(&"easyocr".to_string()));
assert!(backends.contains(&"paddleocr".to_string()));
assert!(backends.contains(&"tesseract".to_string()));
}
#[test]
fn test_get_language_count() {
let registry = LanguageRegistry::new();
assert_eq!(registry.get_language_count("easyocr"), 83);
assert_eq!(registry.get_language_count("paddleocr"), 14);
assert!(registry.get_language_count("tesseract") >= 100);
assert_eq!(registry.get_language_count("nonexistent"), 0);
}
#[test]
fn test_default_implementation() {
let registry1 = LanguageRegistry::default();
let registry2 = LanguageRegistry::new();
assert_eq!(
registry1.get_language_count("easyocr"),
registry2.get_language_count("easyocr")
);
}
#[test]
fn test_global_instance() {
let global1 = LanguageRegistry::global();
let global2 = LanguageRegistry::global();
assert_eq!(
global1.get_language_count("easyocr"),
global2.get_language_count("easyocr")
);
}
#[test]
fn test_easyocr_specific_languages() {
let registry = LanguageRegistry::new();
let languages = registry.get_supported_languages("easyocr").unwrap();
assert!(languages.contains(&"abq".to_string()));
assert!(languages.contains(&"bho".to_string()));
assert!(languages.contains(&"gom".to_string()));
assert!(languages.contains(&"rs_cyrillic".to_string()));
assert!(languages.contains(&"rs_latin".to_string()));
}
#[test]
fn test_paddleocr_specific_languages() {
let registry = LanguageRegistry::new();
let languages = registry.get_supported_languages("paddleocr").unwrap();
assert!(languages.contains(&"ch".to_string()));
assert!(languages.contains(&"chinese_cht".to_string()));
assert!(languages.contains(&"devanagari".to_string()));
assert!(languages.contains(&"arabic".to_string()));
}
#[test]
fn test_tesseract_specific_languages() {
let registry = LanguageRegistry::new();
let languages = registry.get_supported_languages("tesseract").unwrap();
assert!(languages.contains(&"chi_sim".to_string()));
assert!(languages.contains(&"chi_tra".to_string()));
assert!(languages.contains(&"ita_old".to_string()));
assert!(languages.contains(&"spa_old".to_string()));
}
}