use crate::types::{
Gender, LanguageCode, ModelConfig, QualityLevel, SpeakingStyle, VoiceCharacteristics,
VoiceConfig,
};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoiceInfo {
pub config: VoiceConfig,
pub metrics: VoiceMetrics,
pub compatibility: VoiceCompatibility,
pub model_info: ModelInfo,
pub usage_stats: Option<VoiceUsageStats>,
}
impl VoiceInfo {
pub fn from_config(config: VoiceConfig) -> Self {
let metrics = VoiceMetrics::from_voice(&config);
let compatibility = VoiceCompatibility::from_voice(&config);
let model_info = ModelInfo::from_config(&config.model_config);
Self {
config,
metrics,
compatibility,
model_info,
usage_stats: None,
}
}
pub fn id(&self) -> &str {
&self.config.id
}
pub fn name(&self) -> &str {
&self.config.name
}
pub fn language(&self) -> LanguageCode {
self.config.language
}
pub fn characteristics(&self) -> &VoiceCharacteristics {
&self.config.characteristics
}
pub fn supports_feature(&self, feature: VoiceFeature) -> bool {
match feature {
VoiceFeature::EmotionSupport => self.config.characteristics.emotion_support,
VoiceFeature::GpuAcceleration => {
self.config.model_config.device_requirements.gpu_support
}
VoiceFeature::LowMemory => {
self.config.model_config.device_requirements.min_memory_mb <= 512
}
VoiceFeature::HighQuality => matches!(
self.config.characteristics.quality,
QualityLevel::High | QualityLevel::Ultra
),
}
}
pub fn summary(&self) -> VoiceSummary {
VoiceSummary {
id: self.config.id.clone(),
name: self.config.name.clone(),
language: self.config.language,
gender: self.config.characteristics.gender,
style: self.config.characteristics.style,
quality: self.config.characteristics.quality,
emotion_support: self.config.characteristics.emotion_support,
memory_requirement: self.config.model_config.device_requirements.min_memory_mb,
gpu_support: self.config.model_config.device_requirements.gpu_support,
}
}
pub fn to_json(&self) -> Result<String, serde_json::Error> {
serde_json::to_string_pretty(self)
}
pub fn from_json(json: &str) -> Result<Self, serde_json::Error> {
serde_json::from_str(json)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoiceMetrics {
pub quality_score: f32,
pub naturalness_score: f32,
pub performance_score: f32,
pub memory_efficiency: f32,
pub overall_rating: f32,
pub complexity: VoiceComplexity,
}
impl VoiceMetrics {
pub fn from_voice(voice: &VoiceConfig) -> Self {
let quality_score = Self::compute_quality_score(&voice.characteristics);
let naturalness_score = Self::compute_naturalness_score(&voice.characteristics);
let performance_score = Self::compute_performance_score(&voice.model_config);
let memory_efficiency = Self::compute_memory_efficiency(&voice.model_config);
let complexity = Self::determine_complexity(voice);
let overall_rating =
(quality_score + naturalness_score + performance_score + memory_efficiency) / 4.0;
Self {
quality_score,
naturalness_score,
performance_score,
memory_efficiency,
overall_rating,
complexity,
}
}
fn compute_quality_score(characteristics: &VoiceCharacteristics) -> f32 {
match characteristics.quality {
QualityLevel::Low => 0.25,
QualityLevel::Medium => 0.5,
QualityLevel::High => 0.75,
QualityLevel::Ultra => 1.0,
}
}
fn compute_naturalness_score(characteristics: &VoiceCharacteristics) -> f32 {
let mut score: f32 = 0.6;
if characteristics.emotion_support {
score += 0.2;
}
match characteristics.style {
SpeakingStyle::Neutral | SpeakingStyle::Calm => score += 0.15,
SpeakingStyle::Conversational => score += 0.2,
SpeakingStyle::News | SpeakingStyle::Formal => score += 0.05,
_ => {}
}
score.min(1.0)
}
fn compute_performance_score(model_config: &ModelConfig) -> f32 {
let base_score: f32 = if model_config.device_requirements.gpu_support {
0.8 } else {
0.5
};
let memory_factor: f32 = if model_config.device_requirements.min_memory_mb <= 512 {
1.0
} else if model_config.device_requirements.min_memory_mb <= 1024 {
0.8
} else {
0.6
};
(base_score * memory_factor).min(1.0)
}
fn compute_memory_efficiency(model_config: &ModelConfig) -> f32 {
let memory_mb = model_config.device_requirements.min_memory_mb as f32;
(2048.0 - memory_mb.min(2048.0)) / 2048.0
}
fn determine_complexity(voice: &VoiceConfig) -> VoiceComplexity {
let memory_mb = voice.model_config.device_requirements.min_memory_mb;
let has_emotion = voice.characteristics.emotion_support;
let high_quality = matches!(
voice.characteristics.quality,
QualityLevel::High | QualityLevel::Ultra
);
match (memory_mb, has_emotion, high_quality) {
(mb, _, _) if mb > 1536 => VoiceComplexity::High,
(mb, true, true) if mb > 768 => VoiceComplexity::High,
(mb, _, true) if mb > 512 => VoiceComplexity::Medium,
(mb, true, _) if mb > 512 => VoiceComplexity::Medium,
_ => VoiceComplexity::Low,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum VoiceComplexity {
Low,
Medium,
High,
}
impl fmt::Display for VoiceComplexity {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Low => write!(f, "Low"),
Self::Medium => write!(f, "Medium"),
Self::High => write!(f, "High"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoiceCompatibility {
pub platforms: Vec<String>,
pub compute_capabilities: Vec<String>,
pub compatible_languages: Vec<LanguageCode>,
pub min_requirements: SystemRequirements,
pub recommended_requirements: SystemRequirements,
}
impl VoiceCompatibility {
pub fn from_voice(voice: &VoiceConfig) -> Self {
let platforms = vec![
"linux".to_string(),
"macos".to_string(),
"windows".to_string(),
];
let compute_capabilities = voice
.model_config
.device_requirements
.compute_capabilities
.clone();
let compatible_languages = vec![voice.language];
let min_requirements = SystemRequirements {
memory_mb: voice.model_config.device_requirements.min_memory_mb,
storage_mb: 1024, cpu_cores: 1,
gpu_memory_mb: if voice.model_config.device_requirements.gpu_support {
Some(512)
} else {
None
},
};
let recommended_requirements = SystemRequirements {
memory_mb: voice.model_config.device_requirements.min_memory_mb * 2,
storage_mb: 2048,
cpu_cores: 4,
gpu_memory_mb: if voice.model_config.device_requirements.gpu_support {
Some(2048)
} else {
None
},
};
Self {
platforms,
compute_capabilities,
compatible_languages,
min_requirements,
recommended_requirements,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SystemRequirements {
pub memory_mb: u32,
pub storage_mb: u32,
pub cpu_cores: u32,
pub gpu_memory_mb: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelInfo {
pub g2p_model: Option<ModelFileInfo>,
pub acoustic_model: ModelFileInfo,
pub vocoder_model: ModelFileInfo,
pub total_size_mb: u32,
pub format: String,
}
impl ModelInfo {
pub fn from_config(config: &ModelConfig) -> Self {
let g2p_model = config
.g2p_model
.as_ref()
.map(|path| ModelFileInfo::from_path(path));
let acoustic_model = ModelFileInfo::from_path(&config.acoustic_model);
let vocoder_model = ModelFileInfo::from_path(&config.vocoder_model);
let total_size_mb = g2p_model.as_ref().map(|m| m.estimated_size_mb).unwrap_or(0)
+ acoustic_model.estimated_size_mb
+ vocoder_model.estimated_size_mb;
Self {
g2p_model,
acoustic_model,
vocoder_model,
total_size_mb,
format: format!("{:?}", config.format),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelFileInfo {
pub path: String,
pub estimated_size_mb: u32,
pub model_type: String,
}
impl ModelFileInfo {
pub fn from_path(path: &str) -> Self {
let model_type = if path.contains("g2p") {
"G2P"
} else if path.contains("acoustic") {
"Acoustic"
} else if path.contains("vocoder") {
"Vocoder"
} else {
"Unknown"
}
.to_string();
let estimated_size_mb = match model_type.as_str() {
"G2P" => 50,
"Acoustic" => 200,
"Vocoder" => 100,
_ => 150,
};
Self {
path: path.to_string(),
estimated_size_mb,
model_type,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoiceUsageStats {
pub usage_count: u64,
pub total_synthesis_time: f64,
pub avg_synthesis_time: f64,
pub last_used: Option<std::time::SystemTime>,
pub common_text_lengths: Vec<usize>,
pub performance_metrics: PerformanceMetrics,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceMetrics {
pub avg_rtf: f32,
pub peak_memory_mb: u32,
pub avg_memory_mb: u32,
pub error_rate: f32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum VoiceFeature {
EmotionSupport,
GpuAcceleration,
LowMemory,
HighQuality,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoiceSummary {
pub id: String,
pub name: String,
pub language: LanguageCode,
pub gender: Option<Gender>,
pub style: SpeakingStyle,
pub quality: QualityLevel,
pub emotion_support: bool,
pub memory_requirement: u32,
pub gpu_support: bool,
}
impl fmt::Display for VoiceSummary {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{} ({}) - {:?} {:?} {:?} [{}MB{}]",
self.name,
self.id,
self.language,
self.gender
.map(|g| format!("{g:?}"))
.unwrap_or_else(|| "Unknown".to_string()),
self.quality,
self.memory_requirement,
if self.emotion_support {
", Emotion"
} else {
""
}
)
}
}
pub struct VoiceComparator;
impl VoiceComparator {
pub fn compare(voice1: &VoiceInfo, voice2: &VoiceInfo) -> VoiceComparison {
VoiceComparison {
voice1_id: voice1.id().to_string(),
voice2_id: voice2.id().to_string(),
quality_diff: voice1.metrics.quality_score - voice2.metrics.quality_score,
performance_diff: voice1.metrics.performance_score - voice2.metrics.performance_score,
memory_diff: voice1.config.model_config.device_requirements.min_memory_mb as i32
- voice2.config.model_config.device_requirements.min_memory_mb as i32,
features_diff: Self::compare_features(voice1, voice2),
}
}
pub fn find_best_voice<'a>(
voices: &'a [VoiceInfo],
criteria: &VoiceSelectionCriteria,
) -> Option<&'a VoiceInfo> {
voices
.iter()
.filter(|voice| Self::matches_criteria(voice, criteria))
.max_by(|a, b| {
Self::score_voice(a, criteria)
.partial_cmp(&Self::score_voice(b, criteria))
.expect("value should be present")
})
}
fn compare_features(
voice1: &VoiceInfo,
voice2: &VoiceInfo,
) -> HashMap<String, FeatureComparison> {
let mut features = HashMap::new();
features.insert(
"emotion_support".to_string(),
FeatureComparison {
voice1_has: voice1.config.characteristics.emotion_support,
voice2_has: voice2.config.characteristics.emotion_support,
},
);
features.insert(
"gpu_support".to_string(),
FeatureComparison {
voice1_has: voice1.config.model_config.device_requirements.gpu_support,
voice2_has: voice2.config.model_config.device_requirements.gpu_support,
},
);
features
}
fn matches_criteria(voice: &VoiceInfo, criteria: &VoiceSelectionCriteria) -> bool {
if let Some(max_memory) = criteria.max_memory_mb {
if voice.config.model_config.device_requirements.min_memory_mb > max_memory {
return false;
}
}
if let Some(min_quality) = criteria.min_quality_score {
if voice.metrics.quality_score < min_quality {
return false;
}
}
if let Some(require_emotion) = criteria.require_emotion_support {
if voice.config.characteristics.emotion_support != require_emotion {
return false;
}
}
true
}
fn score_voice(voice: &VoiceInfo, criteria: &VoiceSelectionCriteria) -> f32 {
let mut score = voice.metrics.overall_rating;
if criteria.prioritize_quality {
score += voice.metrics.quality_score * 0.3;
}
if criteria.prioritize_performance {
score += voice.metrics.performance_score * 0.3;
}
if criteria.prioritize_memory_efficiency {
score += voice.metrics.memory_efficiency * 0.2;
}
score
}
}
#[derive(Debug, Clone)]
pub struct VoiceComparison {
pub voice1_id: String,
pub voice2_id: String,
pub quality_diff: f32,
pub performance_diff: f32,
pub memory_diff: i32,
pub features_diff: HashMap<String, FeatureComparison>,
}
#[derive(Debug, Clone)]
pub struct FeatureComparison {
pub voice1_has: bool,
pub voice2_has: bool,
}
#[derive(Debug, Clone, Default)]
pub struct VoiceSelectionCriteria {
pub max_memory_mb: Option<u32>,
pub min_quality_score: Option<f32>,
pub require_emotion_support: Option<bool>,
pub prioritize_quality: bool,
pub prioritize_performance: bool,
pub prioritize_memory_efficiency: bool,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::*;
fn create_test_voice() -> VoiceConfig {
VoiceConfig {
id: "test-voice".to_string(),
name: "Test Voice".to_string(),
language: LanguageCode::EnUs,
characteristics: VoiceCharacteristics {
gender: Some(Gender::Female),
age: Some(AgeRange::Adult),
style: SpeakingStyle::Neutral,
emotion_support: true,
quality: QualityLevel::High,
},
model_config: ModelConfig {
g2p_model: Some("g2p.bin".to_string()),
acoustic_model: "acoustic.bin".to_string(),
vocoder_model: "vocoder.bin".to_string(),
format: ModelFormat::Candle,
device_requirements: DeviceRequirements {
min_memory_mb: 1024,
gpu_support: true,
compute_capabilities: vec!["cpu".to_string(), "cuda".to_string()],
},
},
metadata: HashMap::new(),
}
}
#[test]
fn test_voice_info_creation() {
let voice_config = create_test_voice();
let voice_info = VoiceInfo::from_config(voice_config);
assert_eq!(voice_info.id(), "test-voice");
assert_eq!(voice_info.language(), LanguageCode::EnUs);
assert!(voice_info.supports_feature(VoiceFeature::EmotionSupport));
assert!(voice_info.supports_feature(VoiceFeature::GpuAcceleration));
assert!(voice_info.supports_feature(VoiceFeature::HighQuality));
}
#[test]
fn test_voice_metrics() {
let voice_config = create_test_voice();
let metrics = VoiceMetrics::from_voice(&voice_config);
assert!(metrics.quality_score > 0.0);
assert!(metrics.naturalness_score > 0.0);
assert!(metrics.overall_rating > 0.0);
assert_eq!(metrics.complexity, VoiceComplexity::High);
}
#[test]
fn test_voice_summary() {
let voice_config = create_test_voice();
let voice_info = VoiceInfo::from_config(voice_config);
let summary = voice_info.summary();
assert_eq!(summary.id, "test-voice");
assert_eq!(summary.name, "Test Voice");
assert_eq!(summary.language, LanguageCode::EnUs);
assert_eq!(summary.gender, Some(Gender::Female));
assert!(summary.emotion_support);
}
#[test]
fn test_voice_comparison() {
let voice1 = VoiceInfo::from_config(create_test_voice());
let mut voice2_config = create_test_voice();
voice2_config.id = "test-voice-2".to_string();
voice2_config.characteristics.quality = QualityLevel::Medium;
voice2_config.model_config.device_requirements.min_memory_mb = 512;
let voice2 = VoiceInfo::from_config(voice2_config);
let comparison = VoiceComparator::compare(&voice1, &voice2);
assert!(comparison.quality_diff > 0.0); assert!(comparison.memory_diff > 0); }
#[test]
fn test_voice_selection() {
let voice1 = VoiceInfo::from_config(create_test_voice());
let mut voice2_config = create_test_voice();
voice2_config.id = "test-voice-2".to_string();
voice2_config.characteristics.emotion_support = false;
let voice2 = VoiceInfo::from_config(voice2_config);
let voices = vec![voice1, voice2];
let criteria = VoiceSelectionCriteria {
require_emotion_support: Some(true),
..Default::default()
};
let best = VoiceComparator::find_best_voice(&voices, &criteria);
assert!(best.is_some());
assert_eq!(best.unwrap().id(), "test-voice");
}
#[test]
fn test_json_serialization() {
let voice_config = create_test_voice();
let voice_info = VoiceInfo::from_config(voice_config);
let json = voice_info.to_json().unwrap();
assert!(json.contains("test-voice"));
let restored = VoiceInfo::from_json(&json).unwrap();
assert_eq!(restored.id(), voice_info.id());
}
#[test]
fn test_model_info() {
let voice_config = create_test_voice();
let model_info = ModelInfo::from_config(&voice_config.model_config);
assert!(model_info.g2p_model.is_some());
assert_eq!(model_info.acoustic_model.model_type, "Acoustic");
assert_eq!(model_info.vocoder_model.model_type, "Vocoder");
assert!(model_info.total_size_mb > 0);
}
}