use crate::{
error::Result,
types::{LanguageCode, MelSpectrogram, Phoneme, SynthesisConfig},
AudioBuffer,
};
use async_trait::async_trait;
use std::collections::HashMap;
#[async_trait]
pub trait G2p: Send + Sync {
async fn to_phonemes(&self, text: &str, lang: Option<LanguageCode>) -> Result<Vec<Phoneme>>;
fn supported_languages(&self) -> Vec<LanguageCode>;
fn metadata(&self) -> G2pMetadata;
async fn preprocess(&self, text: &str, lang: Option<LanguageCode>) -> Result<String> {
let _ = lang; Ok(text.to_string())
}
async fn detect_language(&self, text: &str) -> Result<LanguageCode> {
let _ = text; self.supported_languages()
.first()
.copied()
.ok_or_else(|| crate::VoirsError::g2p_error("No supported languages"))
}
}
#[derive(Debug, Clone)]
pub struct G2pMetadata {
pub name: String,
pub version: String,
pub description: String,
pub supported_languages: Vec<LanguageCode>,
pub accuracy_scores: HashMap<LanguageCode, f32>,
}
#[async_trait]
pub trait AcousticModel: Send + Sync {
async fn synthesize(
&self,
phonemes: &[Phoneme],
config: Option<&SynthesisConfig>,
) -> Result<MelSpectrogram>;
async fn synthesize_batch(
&self,
inputs: &[&[Phoneme]],
configs: Option<&[SynthesisConfig]>,
) -> Result<Vec<MelSpectrogram>>;
fn metadata(&self) -> AcousticModelMetadata;
fn supports(&self, feature: AcousticModelFeature) -> bool;
async fn set_speaker(&mut self, speaker_id: Option<u32>) -> Result<()> {
let _ = speaker_id; Ok(()) }
}
#[derive(Debug, Clone)]
pub struct AcousticModelMetadata {
pub name: String,
pub version: String,
pub architecture: String, pub supported_languages: Vec<LanguageCode>,
pub sample_rate: u32,
pub mel_channels: u32,
pub is_multi_speaker: bool,
pub speaker_count: Option<u32>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AcousticModelFeature {
MultiSpeaker,
EmotionControl,
ProsodyControl,
StreamingInference,
StreamingSynthesis,
BatchProcessing,
StyleTransfer,
GpuAcceleration,
VoiceCloning,
RealTimeInference,
}
#[async_trait]
pub trait Vocoder: Send + Sync {
async fn vocode(
&self,
mel: &MelSpectrogram,
config: Option<&SynthesisConfig>,
) -> Result<AudioBuffer>;
async fn vocode_stream(
&self,
mel_stream: Box<dyn futures::Stream<Item = MelSpectrogram> + Send + Unpin>,
config: Option<&SynthesisConfig>,
) -> Result<Box<dyn futures::Stream<Item = Result<AudioBuffer>> + Send + Unpin>>;
async fn vocode_batch(
&self,
mels: &[MelSpectrogram],
configs: Option<&[SynthesisConfig]>,
) -> Result<Vec<AudioBuffer>>;
fn metadata(&self) -> VocoderMetadata;
fn supports(&self, feature: VocoderFeature) -> bool;
}
#[derive(Debug, Clone)]
pub struct VocoderMetadata {
pub name: String,
pub version: String,
pub architecture: String, pub sample_rate: u32,
pub mel_channels: u32,
pub latency_ms: f32,
pub quality_score: f32, }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum VocoderFeature {
StreamingInference,
BatchProcessing,
GpuAcceleration,
MultiSampleRate,
EnhancementFilters,
RealtimeProcessing,
}
#[async_trait]
pub trait TextProcessor: Send + Sync {
async fn process(&self, text: &str, lang: LanguageCode) -> Result<String>;
fn normalize(&self, text: &str) -> Result<String>;
fn expand(&self, text: &str, lang: LanguageCode) -> Result<String>;
fn clean(&self, text: &str) -> Result<String>;
}
#[async_trait]
pub trait AudioProcessor: Send + Sync {
async fn process(&self, audio: &AudioBuffer) -> Result<AudioBuffer>;
fn metadata(&self) -> AudioProcessorMetadata;
}
#[derive(Debug, Clone)]
pub struct AudioProcessorMetadata {
pub name: String,
pub version: String,
pub description: String,
pub processing_time_ms: f32,
}
#[async_trait]
pub trait VoiceManager: Send + Sync {
async fn list_voices(&self) -> Result<Vec<crate::types::VoiceConfig>>;
async fn get_voice(&self, voice_id: &str) -> Result<Option<crate::types::VoiceConfig>>;
async fn download_voice(&self, voice_id: &str) -> Result<()>;
fn is_voice_available(&self, voice_id: &str) -> bool;
fn default_voice_for_language(&self, lang: LanguageCode) -> Option<String>;
}
#[async_trait]
pub trait ModelCache: Send + Sync {
async fn get_any(&self, key: &str) -> Result<Option<Box<dyn std::any::Any + Send + Sync>>>;
async fn put_any(&self, key: &str, value: Box<dyn std::any::Any + Send + Sync>) -> Result<()>;
async fn remove(&self, key: &str) -> Result<()>;
async fn clear(&self) -> Result<()>;
fn stats(&self) -> CacheStats;
}
#[derive(Debug, Clone, Copy, Default, serde::Serialize, serde::Deserialize)]
pub struct CacheStats {
pub total_entries: usize,
pub memory_usage_bytes: usize,
pub hit_rate: f32,
pub miss_rate: f32,
}
pub trait Plugin: Send + Sync {
fn name(&self) -> &str;
fn version(&self) -> &str;
fn initialize(&self, _config: &crate::plugins::PluginConfig) -> Result<()> {
Ok(()) }
fn shutdown(&self) -> Result<()> {
Ok(()) }
}
#[async_trait]
pub trait AudioEffectPlugin: Plugin {
async fn process(&self, audio: &AudioBuffer) -> Result<AudioBuffer>;
fn parameters(&self) -> HashMap<String, f32>;
fn set_parameter(&mut self, name: &str, value: f32) -> Result<()>;
}