use std::collections::HashMap;
use crate::error::G2pError;
pub type PhonemeIdMap = HashMap<String, Vec<i64>>;
#[derive(Debug, Clone, Copy)]
pub struct ProsodyInfo {
pub a1: i32,
pub a2: i32,
pub a3: i32,
}
pub type ProsodyFeature = [i32; 3];
const MAX_INPUT_LENGTH: usize = 10_000;
pub trait Phonemizer: Send + Sync {
fn phonemize_with_prosody(
&self,
text: &str,
) -> Result<(Vec<String>, Vec<Option<ProsodyInfo>>), G2pError>;
fn language_code(&self) -> &str;
fn detect_primary_language(&self, _text: &str) -> &str {
self.language_code()
}
fn validate_input(&self, text: &str) -> Result<String, G2pError> {
if text.len() > MAX_INPUT_LENGTH {
return Err(G2pError::Phonemize(format!(
"input too long: {} chars (max {})",
text.chars().count(),
MAX_INPUT_LENGTH
)));
}
let sanitized: String = text
.chars()
.filter(|c| !c.is_control() || *c == '\n' || *c == '\t' || *c == '\r')
.collect();
Ok(sanitized)
}
}
pub struct PhonemizerRegistry {
registry: HashMap<String, Box<dyn Phonemizer>>,
}
impl PhonemizerRegistry {
pub fn new() -> Self {
Self {
registry: HashMap::new(),
}
}
pub fn register(&mut self, lang_code: &str, phonemizer: Box<dyn Phonemizer>) {
self.registry.insert(lang_code.to_string(), phonemizer);
}
pub fn get(&self, lang_code: &str) -> Option<&dyn Phonemizer> {
self.registry.get(lang_code).map(|p| p.as_ref())
}
pub fn available_languages(&self) -> Vec<&str> {
self.registry.keys().map(|s| s.as_str()).collect()
}
}
impl Default for PhonemizerRegistry {
fn default() -> Self {
Self::new()
}
}