use crate::GlobalOptions;
use std::collections::HashMap;
use std::time::{Duration, Instant};
use voirs_g2p::{
accuracy::{AccuracyBenchmark, TestCase},
LanguageCode,
};
use voirs_sdk::config::AppConfig;
use voirs_sdk::types::SynthesisConfig;
use voirs_sdk::VoirsPipeline;
use voirs_sdk::{QualityLevel, Result};
#[derive(Debug, Clone)]
pub struct BenchmarkResult {
pub model_id: String,
pub avg_synthesis_time: Duration,
pub avg_audio_duration: Duration,
pub real_time_factor: f64,
pub memory_usage_mb: f64,
pub quality_score: f64,
pub success_rate: f64,
pub phoneme_accuracy: Option<f64>,
pub word_accuracy: Option<f64>,
pub accuracy_target_met: Option<bool>,
pub english_accuracy_target_met: Option<bool>,
pub japanese_accuracy_target_met: Option<bool>,
pub latency_target_met: bool,
pub memory_target_met: bool,
}
pub async fn run_benchmark_models(
model_ids: &[String],
iterations: u32,
include_accuracy: bool,
config: &AppConfig,
global: &GlobalOptions,
) -> Result<()> {
if !global.quiet {
println!("Benchmarking TTS Models");
println!("=======================");
println!("Iterations: {}", iterations);
println!("Models: {}", model_ids.len());
if include_accuracy {
println!("Accuracy Testing: Enabled (CMU Test Set)");
}
println!();
}
let test_sentences = get_test_sentences();
let mut results = Vec::new();
let accuracy_benchmark = if include_accuracy {
if !global.quiet {
println!("Loading CMU accuracy test data...");
}
Some(load_cmu_accuracy_benchmark()?)
} else {
None
};
for model_id in model_ids {
if !global.quiet {
println!("Benchmarking model: {}", model_id);
}
let result = benchmark_model(
model_id,
&test_sentences,
iterations,
accuracy_benchmark.as_ref(),
config,
global,
)
.await?;
results.push(result);
if !global.quiet {
println!(" ✓ Completed\n");
}
}
display_benchmark_results(&results, global);
if results.len() > 1 {
generate_comparison_report(&results, global);
}
Ok(())
}
async fn benchmark_model(
model_id: &str,
test_sentences: &[String],
iterations: u32,
accuracy_benchmark: Option<&AccuracyBenchmark>,
config: &AppConfig,
global: &GlobalOptions,
) -> Result<BenchmarkResult> {
if !global.quiet {
println!(" Loading model: {}", model_id);
}
let pipeline = load_model_pipeline(model_id, config, global).await?;
let synth_config = SynthesisConfig {
quality: QualityLevel::High,
..Default::default()
};
let mut total_synthesis_time = Duration::from_secs(0);
let mut total_audio_duration = 0.0;
let mut successful_runs = 0;
let mut memory_samples = Vec::new();
for i in 0..iterations {
if !global.quiet && iterations > 1 {
print!(" Progress: {}/{}\r", i + 1, iterations);
}
for sentence in test_sentences {
let start_time = Instant::now();
let memory_before = get_memory_usage();
match pipeline
.synthesize_with_config(sentence, &synth_config)
.await
{
Ok(audio) => {
let synthesis_time = start_time.elapsed();
total_synthesis_time += synthesis_time;
total_audio_duration += audio.duration();
successful_runs += 1;
let memory_after = get_memory_usage();
memory_samples.push(memory_after - memory_before);
}
Err(e) => {
tracing::warn!("Synthesis failed for '{}': {}", sentence, e);
}
}
}
}
let total_runs = iterations as usize * test_sentences.len();
let avg_synthesis_time = total_synthesis_time / total_runs as u32;
let avg_audio_duration =
Duration::from_secs_f64(total_audio_duration as f64 / successful_runs as f64);
let real_time_factor = avg_synthesis_time.as_secs_f64() / avg_audio_duration.as_secs_f64();
let success_rate = successful_runs as f64 / total_runs as f64;
let avg_memory_usage = memory_samples.iter().sum::<f64>() / memory_samples.len() as f64;
let quality_score = calculate_quality_score(model_id, &real_time_factor, &success_rate);
let latency_target_met = check_latency_target(&avg_synthesis_time);
let memory_target_met = check_memory_target(avg_memory_usage);
if !global.quiet {
println!(" Performance Targets:");
println!(
" Latency (<1ms): {} - {:.2}ms",
if latency_target_met {
"✅ PASSED"
} else {
"❌ FAILED"
},
avg_synthesis_time.as_millis()
);
println!(
" Memory (<100MB): {} - {:.1}MB",
if memory_target_met {
"✅ PASSED"
} else {
"❌ FAILED"
},
avg_memory_usage
);
}
let (
phoneme_accuracy,
word_accuracy,
accuracy_target_met,
english_accuracy_target_met,
japanese_accuracy_target_met,
) = if let Some(benchmark) = accuracy_benchmark {
if !global.quiet {
println!(" Running accuracy tests...");
}
match run_accuracy_test(&pipeline, benchmark, global).await {
Ok((phoneme_acc, word_acc, target_met, en_target_met, ja_target_met)) => {
if !global.quiet {
println!(" Phoneme Accuracy: {:.2}%", phoneme_acc * 100.0);
println!(" Word Accuracy: {:.2}%", word_acc * 100.0);
println!(
" Overall Target Met: {}",
if target_met { "✅" } else { "❌" }
);
println!(
" English Target (>95%): {}",
if en_target_met { "✅" } else { "❌" }
);
println!(
" Japanese Target (>90%): {}",
if ja_target_met { "✅" } else { "❌" }
);
}
(
Some(phoneme_acc),
Some(word_acc),
Some(target_met),
Some(en_target_met),
Some(ja_target_met),
)
}
Err(e) => {
if !global.quiet {
println!(" Accuracy test failed: {}", e);
}
(None, None, None, None, None)
}
}
} else {
(None, None, None, None, None)
};
Ok(BenchmarkResult {
model_id: model_id.to_string(),
avg_synthesis_time,
avg_audio_duration,
real_time_factor,
memory_usage_mb: avg_memory_usage,
quality_score,
success_rate,
phoneme_accuracy,
word_accuracy,
accuracy_target_met,
english_accuracy_target_met,
japanese_accuracy_target_met,
latency_target_met,
memory_target_met,
})
}
fn get_test_sentences() -> Vec<String> {
vec![
"The quick brown fox jumps over the lazy dog.".to_string(),
"Hello, this is a test of the text-to-speech system.".to_string(),
"Artificial intelligence is transforming the way we communicate.".to_string(),
"The weather today is absolutely beautiful with clear skies.".to_string(),
"Machine learning models require careful tuning and validation.".to_string(),
]
}
fn get_memory_usage() -> f64 {
if let Ok(status) = std::fs::read_to_string("/proc/self/status") {
for line in status.lines() {
if line.starts_with("VmRSS:") {
if let Some(kb_str) = line.split_whitespace().nth(1) {
if let Ok(kb) = kb_str.parse::<f64>() {
return kb / 1024.0; }
}
}
}
}
#[cfg(unix)]
{
unsafe {
let mut usage = std::mem::MaybeUninit::<libc::rusage>::uninit();
if libc::getrusage(libc::RUSAGE_SELF, usage.as_mut_ptr()) == 0 {
let usage = usage.assume_init();
#[cfg(target_os = "linux")]
return usage.ru_maxrss as f64 / 1024.0;
#[cfg(target_os = "macos")]
return usage.ru_maxrss as f64 / (1024.0 * 1024.0); }
}
}
if let Ok(meminfo) = std::fs::read_to_string("/proc/meminfo") {
let mut total_kb = None;
let mut available_kb = None;
for line in meminfo.lines() {
if line.starts_with("MemTotal:") {
if let Some(kb_str) = line.split_whitespace().nth(1) {
if let Ok(kb) = kb_str.parse::<f64>() {
total_kb = Some(kb);
}
}
} else if line.starts_with("MemAvailable:") {
if let Some(kb_str) = line.split_whitespace().nth(1) {
if let Ok(kb) = kb_str.parse::<f64>() {
available_kb = Some(kb);
}
}
}
}
if let (Some(total), Some(available)) = (total_kb, available_kb) {
let used_mb = (total - available) / 1024.0; return used_mb;
}
}
50.0 }
fn calculate_quality_score(model_id: &str, real_time_factor: &f64, success_rate: &f64) -> f64 {
let performance_score = if *real_time_factor < 0.05 {
5.0
} else if *real_time_factor < 0.1 {
4.5 + 0.5 * (0.1 - real_time_factor) / 0.05 } else if *real_time_factor < 0.25 {
3.5 + 1.0 * (0.25 - real_time_factor) / 0.15 } else if *real_time_factor < 0.5 {
2.5 + 1.0 * (0.5 - real_time_factor) / 0.25 } else if *real_time_factor < 1.0 {
1.5 + 1.0 * (1.0 - real_time_factor) / 0.5 } else if *real_time_factor < 2.0 {
0.5 + 1.0 * (2.0 - real_time_factor) / 1.0 } else {
(5.0 / real_time_factor).min(0.5) };
let reliability_score = if *success_rate >= 0.99 {
5.0
} else if *success_rate >= 0.95 {
4.0 + 1.0 * (success_rate - 0.95) / 0.04 } else if *success_rate >= 0.90 {
3.0 + 1.0 * (success_rate - 0.90) / 0.05 } else if *success_rate >= 0.75 {
1.5 + 1.5 * (success_rate - 0.75) / 0.15 } else {
success_rate * 2.0 };
let (model_quality_baseline, model_speed_expectation) = if model_id.contains("hifigan") {
(0.6, 0.15) } else if model_id.contains("waveglow") || model_id.contains("wavernn") {
(0.8, 0.5) } else if model_id.contains("melgan") || model_id.contains("parallel-wavegan") {
(0.5, 0.1) } else if model_id.contains("tacotron") {
(0.5, 0.3) } else if model_id.contains("fastspeech") {
(0.6, 0.2) } else if model_id.contains("vits") {
(0.7, 0.25) } else if model_id.contains("diffwave") || model_id.contains("diffusion") {
(0.9, 1.0) } else {
(0.3, 0.5) };
let speed_bonus = if *real_time_factor <= model_speed_expectation {
model_quality_baseline
} else if *real_time_factor <= model_speed_expectation * 2.0 {
model_quality_baseline
* (1.0 - (real_time_factor - model_speed_expectation) / model_speed_expectation)
} else {
0.0 };
let total_score = performance_score * 0.4 + reliability_score * 0.4 + speed_bonus * 0.2;
total_score.clamp(0.0, 5.0)
}
fn display_benchmark_results(results: &[BenchmarkResult], global: &GlobalOptions) {
if global.quiet {
return;
}
println!("Benchmark Results:");
println!("==================");
for result in results {
println!("\nModel: {}", result.model_id);
println!(
" Avg Synthesis Time: {:.2}ms",
result.avg_synthesis_time.as_millis()
);
println!(
" Avg Audio Duration: {:.2}ms",
result.avg_audio_duration.as_millis()
);
println!(" Real-time Factor: {:.2}x", result.real_time_factor);
println!(" Memory Usage: {:.1} MB", result.memory_usage_mb);
println!(" Quality Score: {:.1}/5.0", result.quality_score);
println!(" Success Rate: {:.1}%", result.success_rate * 100.0);
if let Some(phoneme_acc) = result.phoneme_accuracy {
println!(" Phoneme Accuracy: {:.2}%", phoneme_acc * 100.0);
}
if let Some(word_acc) = result.word_accuracy {
println!(" Word Accuracy: {:.2}%", word_acc * 100.0);
}
if let Some(target_met) = result.accuracy_target_met {
println!(" Accuracy Targets:");
if let Some(en_target) = result.english_accuracy_target_met {
println!(
" English (>95%): {}",
if en_target {
"✅ PASSED"
} else {
"❌ FAILED"
}
);
}
if let Some(ja_target) = result.japanese_accuracy_target_met {
println!(
" Japanese (>90%): {}",
if ja_target {
"✅ PASSED"
} else {
"❌ FAILED"
}
);
}
println!(
" Overall: {}",
if target_met {
"✅ PASSED"
} else {
"❌ FAILED"
}
);
}
println!(
" Latency Target (<1ms): {}",
if result.latency_target_met {
"✅ PASSED"
} else {
"❌ FAILED"
}
);
println!(
" Memory Target (<100MB): {}",
if result.memory_target_met {
"✅ PASSED"
} else {
"❌ FAILED"
}
);
}
}
fn generate_comparison_report(results: &[BenchmarkResult], global: &GlobalOptions) {
if global.quiet {
return;
}
println!("\n\nComparison Report:");
println!("==================");
let fastest = results.iter().min_by(|a, b| {
a.real_time_factor
.partial_cmp(&b.real_time_factor)
.unwrap_or(std::cmp::Ordering::Equal)
});
let most_reliable = results.iter().max_by(|a, b| {
a.success_rate
.partial_cmp(&b.success_rate)
.unwrap_or(std::cmp::Ordering::Equal)
});
let highest_quality = results.iter().max_by(|a, b| {
a.quality_score
.partial_cmp(&b.quality_score)
.unwrap_or(std::cmp::Ordering::Equal)
});
let most_efficient = results.iter().min_by(|a, b| {
a.memory_usage_mb
.partial_cmp(&b.memory_usage_mb)
.unwrap_or(std::cmp::Ordering::Equal)
});
if let Some(model) = fastest {
println!(
"🏃 Fastest Model: {} ({:.2}x real-time)",
model.model_id, model.real_time_factor
);
}
if let Some(model) = most_reliable {
println!(
"🎯 Most Reliable: {} ({:.1}% success rate)",
model.model_id,
model.success_rate * 100.0
);
}
if let Some(model) = highest_quality {
println!(
"⭐ Highest Quality: {} ({:.1}/5.0)",
model.model_id, model.quality_score
);
}
if let Some(model) = most_efficient {
println!(
"💾 Most Memory Efficient: {} ({:.1} MB)",
model.model_id, model.memory_usage_mb
);
}
println!("\n📊 Performance Target Analysis:");
let models_meeting_latency = results.iter().filter(|r| r.latency_target_met).count();
let models_meeting_memory = results.iter().filter(|r| r.memory_target_met).count();
let models_meeting_all_targets = results
.iter()
.filter(|r| {
r.latency_target_met && r.memory_target_met && r.accuracy_target_met.unwrap_or(false)
})
.count();
println!(
" 🚀 Models meeting latency target (<1ms): {}/{}",
models_meeting_latency,
results.len()
);
println!(
" 🧠 Models meeting memory target (<100MB): {}/{}",
models_meeting_memory,
results.len()
);
if results.iter().any(|r| r.accuracy_target_met.is_some()) {
println!(
" 🎯 Models meeting all targets: {}/{}",
models_meeting_all_targets,
results.len()
);
if models_meeting_all_targets > 0 {
println!(" ✅ Production-ready models found!");
} else {
println!(" ⚠️ No models currently meet all production targets");
}
}
}
async fn load_model_pipeline(
model_id: &str,
config: &AppConfig,
global: &GlobalOptions,
) -> Result<VoirsPipeline> {
let cache_dir = config.pipeline.effective_cache_dir();
let model_path = cache_dir.join("models").join(model_id);
if !model_path.exists() {
return Err(voirs_sdk::VoirsError::config_error(format!(
"Model '{}' not found in cache. Please download it first using 'voirs download-model {}'",
model_id, model_id
)));
}
let model_config_path = model_path.join("config.json");
let model_config = if model_config_path.exists() {
let config_content = std::fs::read_to_string(&model_config_path).map_err(|e| {
voirs_sdk::VoirsError::IoError {
path: model_config_path.clone(),
operation: voirs_sdk::error::IoOperation::Read,
source: e,
}
})?;
serde_json::from_str::<ModelMetadata>(&config_content).map_err(|e| {
voirs_sdk::VoirsError::config_error(format!(
"Invalid model config for '{}': {}",
model_id, e
))
})?
} else {
ModelMetadata {
id: model_id.to_string(),
name: model_id.to_string(),
description: format!("Model {}", model_id),
model_type: "neural".to_string(),
quality: QualityLevel::High,
requires_gpu: false,
memory_requirements_mb: 512,
acoustic_model: "model.safetensors".to_string(),
vocoder_model: "vocoder.safetensors".to_string(),
g2p_model: None,
}
};
if !global.quiet {
println!(
" Model: {} ({})",
model_config.name, model_config.description
);
println!(
" Type: {}, Quality: {:?}",
model_config.model_type, model_config.quality
);
println!(
" Memory required: {} MB",
model_config.memory_requirements_mb
);
}
let available_memory = get_memory_usage(); if model_config.memory_requirements_mb as f64 > available_memory {
tracing::warn!(
"Model '{}' requires {} MB but only {:.1} MB may be available",
model_id,
model_config.memory_requirements_mb,
available_memory
);
}
let acoustic_path = model_path.join(&model_config.acoustic_model);
let vocoder_path = model_path.join(&model_config.vocoder_model);
if !acoustic_path.exists() {
return Err(voirs_sdk::VoirsError::config_error(format!(
"Acoustic model file not found: {}",
acoustic_path.display()
)));
}
if !vocoder_path.exists() {
return Err(voirs_sdk::VoirsError::config_error(format!(
"Vocoder model file not found: {}",
vocoder_path.display()
)));
}
let mut builder = VoirsPipeline::builder().with_quality(model_config.quality);
if model_config.requires_gpu && (config.pipeline.use_gpu || global.gpu) {
builder = builder.with_gpu_acceleration(true);
if !global.quiet {
println!(" GPU acceleration: enabled (required by model)");
}
} else if config.pipeline.use_gpu || global.gpu {
builder = builder.with_gpu_acceleration(true);
if !global.quiet {
println!(" GPU acceleration: enabled");
}
} else if !global.quiet {
println!(" GPU acceleration: disabled");
}
if let Some(threads) = config.pipeline.num_threads {
builder = builder.with_threads(threads);
if !global.quiet {
println!(" Threads: {}", threads);
}
} else {
let default_threads = config.pipeline.effective_thread_count();
builder = builder.with_threads(default_threads);
if !global.quiet {
println!(" Threads: {} (auto)", default_threads);
}
}
let pipeline = builder.build().await.map_err(|e| {
voirs_sdk::VoirsError::config_error(format!("Failed to load model '{}': {}", model_id, e))
})?;
if !global.quiet {
println!(" ✓ Model loaded successfully");
}
Ok(pipeline)
}
fn load_cmu_accuracy_benchmark() -> Result<AccuracyBenchmark> {
let mut benchmark = AccuracyBenchmark::new();
let cmu_test_cases = vec![
("hello", vec!["h", "ə", "ˈl", "oʊ"], LanguageCode::EnUs),
("world", vec!["w", "ɜːr", "l", "d"], LanguageCode::EnUs),
("cat", vec!["k", "æ", "t"], LanguageCode::EnUs),
("dog", vec!["d", "ɔː", "ɡ"], LanguageCode::EnUs),
("house", vec!["h", "aʊ", "s"], LanguageCode::EnUs),
("tree", vec!["t", "r", "iː"], LanguageCode::EnUs),
("water", vec!["ˈw", "ɔː", "t", "ər"], LanguageCode::EnUs),
("phone", vec!["f", "oʊ", "n"], LanguageCode::EnUs),
("beat", vec!["b", "iː", "t"], LanguageCode::EnUs),
("bit", vec!["b", "ɪ", "t"], LanguageCode::EnUs),
("bet", vec!["b", "ɛ", "t"], LanguageCode::EnUs),
("bat", vec!["b", "æ", "t"], LanguageCode::EnUs),
("bot", vec!["b", "ɑː", "t"], LanguageCode::EnUs),
("boat", vec!["b", "oʊ", "t"], LanguageCode::EnUs),
("boot", vec!["b", "uː", "t"], LanguageCode::EnUs),
("but", vec!["b", "ʌ", "t"], LanguageCode::EnUs),
("street", vec!["s", "t", "r", "iː", "t"], LanguageCode::EnUs),
("spring", vec!["s", "p", "r", "ɪ", "ŋ"], LanguageCode::EnUs),
("school", vec!["s", "k", "uː", "l"], LanguageCode::EnUs),
("throw", vec!["θ", "r", "oʊ"], LanguageCode::EnUs),
("three", vec!["θ", "r", "iː"], LanguageCode::EnUs),
("one", vec!["w", "ʌ", "n"], LanguageCode::EnUs),
("two", vec!["t", "uː"], LanguageCode::EnUs),
("eight", vec!["eɪ", "t"], LanguageCode::EnUs),
("through", vec!["θ", "r", "uː"], LanguageCode::EnUs),
("though", vec!["ð", "oʊ"], LanguageCode::EnUs),
("rough", vec!["r", "ʌ", "f"], LanguageCode::EnUs),
(
"computer",
vec!["k", "ə", "m", "ˈp", "j", "uː", "t", "ər"],
LanguageCode::EnUs,
),
(
"beautiful",
vec!["ˈb", "j", "uː", "t", "ɪ", "f", "ə", "l"],
LanguageCode::EnUs,
),
(
"restaurant",
vec!["ˈr", "ɛ", "s", "t", "ər", "ɑː", "n", "t"],
LanguageCode::EnUs,
),
(
"university",
vec!["j", "uː", "n", "ɪ", "ˈv", "ɜːr", "s", "ə", "t", "i"],
LanguageCode::EnUs,
),
(
"pronunciation",
vec!["p", "r", "ə", "n", "ʌ", "n", "s", "i", "ˈeɪ", "ʃ", "ə", "n"],
LanguageCode::EnUs,
),
(
"california",
vec!["k", "æ", "l", "ɪ", "ˈf", "ɔːr", "n", "j", "ə"],
LanguageCode::EnUs,
),
(
"washington",
vec!["ˈw", "ɑː", "ʃ", "ɪ", "ŋ", "t", "ə", "n"],
LanguageCode::EnUs,
),
(
"america",
vec!["ə", "ˈm", "ɛr", "ɪ", "k", "ə"],
LanguageCode::EnUs,
),
(
"technology",
vec!["t", "ɛ", "k", "ˈn", "ɑː", "l", "ə", "dʒ", "i"],
LanguageCode::EnUs,
),
(
"artificial",
vec!["ɑːr", "t", "ɪ", "ˈf", "ɪ", "ʃ", "ə", "l"],
LanguageCode::EnUs,
),
(
"intelligence",
vec!["ɪ", "n", "ˈt", "ɛ", "l", "ɪ", "dʒ", "ə", "n", "s"],
LanguageCode::EnUs,
),
(
"synthesis",
vec!["ˈs", "ɪ", "n", "θ", "ə", "s", "ɪ", "s"],
LanguageCode::EnUs,
),
(
"こんにちは",
vec!["k", "o", "n", "n", "i", "ch", "i", "w", "a"],
LanguageCode::Ja,
),
(
"ありがとう",
vec!["a", "r", "i", "g", "a", "t", "o", "u"],
LanguageCode::Ja,
),
(
"おはよう",
vec!["o", "h", "a", "y", "o", "u"],
LanguageCode::Ja,
),
(
"さよなら",
vec!["s", "a", "y", "o", "n", "a", "r", "a"],
LanguageCode::Ja,
),
(
"コンピュータ",
vec!["k", "o", "n", "p", "y", "u", "u", "t", "a"],
LanguageCode::Ja,
),
(
"テクノロジー",
vec!["t", "e", "k", "u", "n", "o", "r", "o", "j", "i", "i"],
LanguageCode::Ja,
),
(
"アニメーション",
vec!["a", "n", "i", "m", "e", "e", "sh", "o", "n"],
LanguageCode::Ja,
),
(
"大学",
vec!["d", "a", "i", "g", "a", "k", "u"],
LanguageCode::Ja,
),
(
"東京",
vec!["t", "o", "u", "k", "y", "o", "u"],
LanguageCode::Ja,
),
(
"日本語",
vec!["n", "i", "h", "o", "n", "g", "o"],
LanguageCode::Ja,
),
];
for (word, phonemes, lang) in cmu_test_cases {
benchmark.add_test_case(TestCase {
word: word.to_string(),
expected_phonemes: phonemes.into_iter().map(|p| p.to_string()).collect(),
language: lang,
});
}
Ok(benchmark)
}
async fn run_accuracy_test(
_pipeline: &VoirsPipeline,
benchmark: &AccuracyBenchmark,
_global: &GlobalOptions,
) -> Result<(f64, f64, bool, bool, bool)> {
let g2p = create_test_g2p_system();
let metrics = benchmark
.evaluate(&g2p)
.await
.map_err(|e| voirs_sdk::VoirsError::config_error(format!("Accuracy test failed: {}", e)))?;
let english_target_met =
if let Some(en_metrics) = metrics.language_metrics.get(&LanguageCode::EnUs) {
en_metrics.accuracy >= 0.95
} else {
false
};
let japanese_target_met =
if let Some(ja_metrics) = metrics.language_metrics.get(&LanguageCode::Ja) {
ja_metrics.accuracy >= 0.90
} else {
false
};
let target_met = english_target_met || japanese_target_met;
Ok((
metrics.phoneme_accuracy,
metrics.word_accuracy,
target_met,
english_target_met,
japanese_target_met,
))
}
fn create_test_g2p_system() -> impl voirs_g2p::G2p {
voirs_g2p::DummyG2p::new()
}
fn check_latency_target(avg_synthesis_time: &Duration) -> bool {
avg_synthesis_time.as_millis() < 1
}
fn check_memory_target(memory_usage_mb: f64) -> bool {
memory_usage_mb < 100.0
}
#[derive(Debug, serde::Deserialize, serde::Serialize)]
struct ModelMetadata {
pub id: String,
pub name: String,
pub description: String,
pub model_type: String,
pub quality: QualityLevel,
pub requires_gpu: bool,
pub memory_requirements_mb: u32,
pub acoustic_model: String,
pub vocoder_model: String,
pub g2p_model: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_test_sentences() {
let sentences = get_test_sentences();
assert!(!sentences.is_empty());
assert!(sentences.len() >= 3);
}
#[test]
fn test_calculate_quality_score() {
let score = calculate_quality_score("hifigan-v1", &0.5, &1.0);
assert!(score >= 0.0 && score <= 5.0);
}
#[test]
fn test_get_memory_usage() {
let usage = get_memory_usage();
assert!(usage >= 0.0);
}
#[test]
fn test_check_latency_target() {
let fast_duration = Duration::from_micros(500);
assert!(check_latency_target(&fast_duration));
let slow_duration = Duration::from_millis(2);
assert!(!check_latency_target(&slow_duration));
let edge_duration = Duration::from_millis(1);
assert!(!check_latency_target(&edge_duration));
}
#[test]
fn test_check_memory_target() {
assert!(check_memory_target(50.0));
assert!(!check_memory_target(150.0));
assert!(!check_memory_target(100.0));
}
}