use crate::{error::CliError, output::OutputFormatter};
use clap::{Args, Subcommand};
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;
#[cfg(feature = "emotion")]
use voirs_emotion::{
Emotion, EmotionConfig, EmotionIntensity, EmotionParameters, EmotionPresetLibrary,
EmotionProcessor, EmotionVector,
};
use voirs_sdk::prelude::*;
#[cfg(feature = "emotion")]
#[derive(Debug, Clone, Subcommand)]
pub enum EmotionCommand {
List(ListArgs),
Synth(SynthArgs),
Blend(BlendArgs),
CreatePreset(CreatePresetArgs),
Validate(ValidateArgs),
}
#[derive(Debug, Clone, Args)]
pub struct ListArgs {
#[arg(long, default_value = "table")]
pub format: String,
#[arg(long)]
pub detailed: bool,
}
#[derive(Debug, Clone, Args)]
pub struct SynthArgs {
#[arg(long)]
pub emotion: String,
#[arg(long, default_value = "0.7")]
pub intensity: f32,
pub text: String,
pub output: PathBuf,
#[arg(long)]
pub voice: Option<String>,
#[arg(long, default_value = "22050")]
pub sample_rate: u32,
}
#[derive(Debug, Clone, Args)]
pub struct BlendArgs {
#[arg(long)]
pub emotions: String,
#[arg(long)]
pub weights: String,
pub text: String,
pub output: PathBuf,
#[arg(long)]
pub voice: Option<String>,
#[arg(long, default_value = "22050")]
pub sample_rate: u32,
}
#[derive(Debug, Clone, Args)]
pub struct CreatePresetArgs {
#[arg(long)]
pub name: String,
#[arg(long)]
pub config: PathBuf,
#[arg(long)]
pub force: bool,
}
#[derive(Debug, Clone, Args)]
pub struct ValidateArgs {
#[arg(long)]
pub preset: String,
#[arg(long, default_value = "Hello, this is a test.")]
pub text: String,
#[arg(long, default_value = "table")]
pub format: String,
}
#[cfg(feature = "emotion")]
pub async fn execute_emotion_command(
command: EmotionCommand,
output_formatter: &OutputFormatter,
) -> std::result::Result<(), CliError> {
match command {
EmotionCommand::List(args) => execute_list(args, output_formatter).await,
EmotionCommand::Synth(args) => execute_synth(args, output_formatter).await,
EmotionCommand::Blend(args) => execute_blend(args, output_formatter).await,
EmotionCommand::CreatePreset(args) => execute_create_preset(args, output_formatter).await,
EmotionCommand::Validate(args) => execute_validate(args, output_formatter).await,
}
}
#[cfg(feature = "emotion")]
async fn execute_list(
args: ListArgs,
output_formatter: &OutputFormatter,
) -> std::result::Result<(), CliError> {
let emotions = get_available_emotions();
match args.format.as_str() {
"table" => {
println!("{:<12} {:<30} Default Intensity", "Emotion", "Description");
println!("{}", "-".repeat(60));
for (name, desc, intensity) in emotions {
println!("{:<12} {:<30} {}", name, desc, intensity);
}
}
"json" => {
let json_emotions: Vec<_> = emotions
.iter()
.map(|(name, desc, intensity)| {
serde_json::json!({
"name": name,
"description": desc,
"default_intensity": intensity,
"parameters": if args.detailed {
Some(get_emotion_parameters(name))
} else {
None
}
})
})
.collect();
println!(
"{}",
serde_json::to_string_pretty(&json_emotions).map_err(CliError::Serialization)?
);
}
_ => {
for (name, desc, intensity) in emotions {
println!("{}: {} (default intensity: {})", name, desc, intensity);
}
}
}
Ok(())
}
#[cfg(feature = "emotion")]
async fn execute_synth(
args: SynthArgs,
output_formatter: &OutputFormatter,
) -> std::result::Result<(), CliError> {
if args.intensity < 0.0 || args.intensity > 1.0 {
return Err(CliError::invalid_parameter(
"intensity",
"Emotion intensity must be between 0.0 and 1.0",
));
}
let mut processor = EmotionProcessor::new()
.map_err(|e| CliError::config(format!("Failed to create emotion processor: {}", e)))?;
let emotion = Emotion::from_str(&args.emotion);
processor
.set_emotion(emotion, Some(args.intensity))
.await
.map_err(|e| CliError::config(format!("Failed to set emotion: {}", e)))?;
println!(
"Synthesizing '{}' with emotion '{}' (intensity: {:.2})",
args.text, args.emotion, args.intensity
);
println!("Processing emotion parameters...");
let mut pipeline_builder = VoirsPipelineBuilder::new().with_quality(QualityLevel::High);
if let Some(voice) = &args.voice {
pipeline_builder = pipeline_builder.with_voice(voice);
}
let pipeline = pipeline_builder
.build()
.await
.map_err(|e| CliError::config(format!("Failed to create synthesis pipeline: {}", e)))?;
let mut synthesis_config = SynthesisConfig::default();
let emotion_params = get_emotion_parameters(&args.emotion);
if let Some(pitch_shift) = emotion_params.get("pitch_shift").and_then(|v| v.as_f64()) {
synthesis_config.pitch_shift = pitch_shift as f32;
}
if let Some(tempo_scale) = emotion_params.get("tempo_scale").and_then(|v| v.as_f64()) {
synthesis_config.speaking_rate = tempo_scale as f32;
}
println!("Generating speech with emotional expression...");
let audio = pipeline
.synthesize_with_config(&args.text, &synthesis_config)
.await
.map_err(|e| CliError::config(format!("Synthesis failed: {}", e)))?;
audio.save_wav(&args.output).map_err(|e| {
CliError::Io(std::io::Error::other(format!(
"Failed to save audio: {}",
e
)))
})?;
output_formatter.success(&format!(
"Emotion synthesis completed! Output saved to: {}",
args.output.display()
));
Ok(())
}
#[cfg(feature = "emotion")]
async fn execute_blend(
args: BlendArgs,
output_formatter: &OutputFormatter,
) -> std::result::Result<(), CliError> {
let emotion_names: Vec<&str> = args.emotions.split(',').collect();
let weight_strings: Vec<&str> = args.weights.split(',').collect();
if emotion_names.len() != weight_strings.len() {
return Err(CliError::invalid_parameter(
"weights",
"Number of emotions must match number of weights",
));
}
let weights: std::result::Result<Vec<f32>, _> = weight_strings
.iter()
.map(|w| w.trim().parse::<f32>())
.collect();
let weights = weights.map_err(|_| {
CliError::invalid_parameter("weights", "Invalid weight format. Use decimal numbers.")
})?;
let weight_sum: f32 = weights.iter().sum();
if (weight_sum - 1.0).abs() > 0.01 {
return Err(CliError::invalid_parameter(
"weights",
"Emotion weights must sum to 1.0",
));
}
let mut emotion_vector = EmotionVector::new();
for (emotion_name, weight) in emotion_names.iter().zip(weights.iter()) {
let emotion = Emotion::from_str(emotion_name.trim());
emotion_vector.add_emotion(emotion, EmotionIntensity::new(*weight));
}
println!(
"Blending emotions: {} with weights: {:?}",
args.emotions, weights
);
println!("Processing emotion blend...");
let mut pipeline_builder = VoirsPipelineBuilder::new().with_quality(QualityLevel::High);
if let Some(voice) = &args.voice {
pipeline_builder = pipeline_builder.with_voice(voice);
}
let pipeline = pipeline_builder
.build()
.await
.map_err(|e| CliError::config(format!("Failed to create synthesis pipeline: {}", e)))?;
let mut synthesis_config = SynthesisConfig::default();
let mut pitch_shift = 0.0f32;
let mut speaking_rate = 1.0f32;
for (emotion_name, weight) in emotion_names.iter().zip(weights.iter()) {
let emotion_params = get_emotion_parameters(emotion_name.trim());
if let Some(pitch) = emotion_params.get("pitch_shift").and_then(|v| v.as_f64()) {
pitch_shift += (pitch as f32 - 1.0) * weight;
}
if let Some(tempo) = emotion_params.get("tempo_scale").and_then(|v| v.as_f64()) {
speaking_rate += (tempo as f32 - 1.0) * weight;
}
}
synthesis_config.pitch_shift = pitch_shift;
synthesis_config.speaking_rate = speaking_rate;
let audio = pipeline
.synthesize_with_config(&args.text, &synthesis_config)
.await
.map_err(|e| CliError::config(format!("Emotion blend synthesis failed: {}", e)))?;
audio.save_wav(&args.output).map_err(|e| {
CliError::Io(std::io::Error::other(format!(
"Failed to save audio: {}",
e
)))
})?;
output_formatter.success(&format!(
"Emotion blend synthesis completed! Output saved to: {}",
args.output.display()
));
Ok(())
}
#[cfg(feature = "emotion")]
async fn execute_create_preset(
args: CreatePresetArgs,
output_formatter: &OutputFormatter,
) -> std::result::Result<(), CliError> {
if preset_exists(&args.name) && !args.force {
return Err(CliError::config(format!(
"Preset '{}' already exists. Use --force to overwrite.",
args.name
)));
}
let config_content = std::fs::read_to_string(&args.config).map_err(CliError::Io)?;
let emotion_config: EmotionConfig = serde_json::from_str(&config_content)
.map_err(|e| CliError::config(format!("Invalid config format: {}", e)))?;
println!("Creating emotion preset '{}'...", args.name);
let preset_dir = get_preset_directory()?;
fs::create_dir_all(&preset_dir).map_err(CliError::Io)?;
let preset_path = preset_dir.join(format!("{}.json", args.name));
let preset_data = serde_json::json!({
"name": args.name,
"version": "1.0",
"created_at": chrono::Utc::now().to_rfc3339(),
"config": emotion_config,
"description": format!("Custom emotion preset: {}", args.name),
"author": "VoiRS CLI",
"tags": ["custom", "user-created"]
});
let preset_json = serde_json::to_string_pretty(&preset_data)
.map_err(|e| CliError::config(format!("Failed to serialize preset: {}", e)))?;
fs::write(&preset_path, preset_json).map_err(CliError::Io)?;
output_formatter.success(&format!(
"Emotion preset '{}' created successfully at: {}",
args.name,
preset_path.display()
));
Ok(())
}
#[cfg(feature = "emotion")]
async fn execute_validate(
args: ValidateArgs,
output_formatter: &OutputFormatter,
) -> std::result::Result<(), CliError> {
if !preset_exists(&args.preset) {
return Err(CliError::config(format!(
"Emotion preset '{}' not found",
args.preset
)));
}
println!("Validating emotion preset '{}'...", args.preset);
let mut validation_results = Vec::new();
let emotion_config = match load_emotion_preset(&args.preset) {
Ok(config) => {
validation_results.push(("Preset Loading", "✓ Valid"));
config
}
Err(e) => {
let error_msg = format!("✗ Error: {}", e);
validation_results.push(("Preset Loading", &error_msg));
return Err(CliError::config(format!("Failed to load preset: {}", e)));
}
};
let emotion_params = get_emotion_parameters(&args.preset);
let pitch_valid = emotion_params
.get("pitch_shift")
.and_then(|v| v.as_f64())
.map(|p| p >= 0.5 && p <= 2.0)
.unwrap_or(false);
validation_results.push((
"Pitch Parameters",
if pitch_valid {
"✓ Valid (0.5-2.0)"
} else {
"⚠ Out of range"
},
));
let tempo_valid = emotion_params
.get("tempo_scale")
.and_then(|v| v.as_f64())
.map(|t| t >= 0.5 && t <= 2.0)
.unwrap_or(false);
validation_results.push((
"Tempo Parameters",
if tempo_valid {
"✓ Valid (0.5-2.0)"
} else {
"⚠ Out of range"
},
));
match test_synthesis_with_preset(&args.preset, &args.text).await {
Ok(quality_score) => {
validation_results.push(("Synthesis Test", "✓ Successful"));
let quality_msg = format!("✓ Score: {:.1}/10", quality_score);
validation_results.push(("Audio Quality", "✓ Good"));
let naturalness = calculate_naturalness_score(quality_score);
let naturalness_msg = format!("✓ {:.1}/10", naturalness);
validation_results.push(("Naturalness Score", "✓ Good")); }
Err(e) => {
let test_error = format!("✗ Failed: {}", e);
validation_results.push(("Synthesis Test", "✗ Failed"));
validation_results.push(("Audio Quality", "✗ Cannot assess"));
validation_results.push(("Naturalness Score", "✗ Cannot assess"));
}
}
match args.format.as_str() {
"table" => {
println!("{:<20} Status", "Parameter");
println!("{}", "-".repeat(40));
for (param, status) in validation_results {
println!("{:<20} {}", param, status);
}
}
"json" => {
let json_results: Vec<_> = validation_results
.into_iter()
.map(|(param, status)| {
serde_json::json!({
"parameter": param,
"status": status
})
})
.collect();
println!(
"{}",
serde_json::to_string_pretty(&json_results).map_err(CliError::Serialization)?
);
}
_ => {
for (param, status) in validation_results {
println!("{}: {}", param, status);
}
}
}
output_formatter.success("Emotion preset validation completed!");
Ok(())
}
fn get_available_emotions() -> Vec<(&'static str, &'static str, f32)> {
vec![
("neutral", "Neutral emotional state", 1.0),
("happy", "Joyful and positive emotional state", 0.7),
("sad", "Melancholic and subdued emotional state", 0.6),
("angry", "Intense and aggressive emotional state", 0.8),
("fear", "Anxious and worried emotional state", 0.6),
("surprise", "Shocked and unexpected emotional state", 0.8),
("disgust", "Repulsed and negative emotional state", 0.7),
("calm", "Peaceful and relaxed emotional state", 0.5),
("excited", "Energetic and enthusiastic emotional state", 0.9),
("tender", "Gentle and affectionate emotional state", 0.6),
(
"confident",
"Assured and self-confident emotional state",
0.7,
),
("melancholic", "Thoughtful and wistful emotional state", 0.5),
]
}
fn get_emotion_parameters(emotion: &str) -> serde_json::Value {
match emotion {
"happy" => serde_json::json!({
"pitch_shift": 1.1,
"tempo_scale": 1.05,
"energy_scale": 1.2,
"brightness": 0.15,
"roughness": -0.1
}),
"sad" => serde_json::json!({
"pitch_shift": 0.9,
"tempo_scale": 0.85,
"energy_scale": 0.7,
"brightness": -0.2,
"breathiness": 0.1
}),
_ => serde_json::json!({
"pitch_shift": 1.0,
"tempo_scale": 1.0,
"energy_scale": 1.0
}),
}
}
fn preset_exists(name: &str) -> bool {
if get_available_emotions()
.iter()
.any(|(emotion_name, _, _)| *emotion_name == name)
{
return true;
}
if let Ok(preset_dir) = get_preset_directory() {
let preset_path = preset_dir.join(format!("{}.json", name));
preset_path.exists()
} else {
false
}
}
fn get_preset_directory() -> std::result::Result<PathBuf, CliError> {
let config_dir = dirs::config_dir()
.ok_or_else(|| CliError::config("Unable to determine config directory"))?;
Ok(config_dir.join("voirs").join("emotion_presets"))
}
fn load_emotion_preset(name: &str) -> std::result::Result<EmotionConfig, CliError> {
if let Ok(preset_dir) = get_preset_directory() {
let preset_path = preset_dir.join(format!("{}.json", name));
if preset_path.exists() {
let content = fs::read_to_string(&preset_path).map_err(CliError::Io)?;
let preset_data: serde_json::Value = serde_json::from_str(&content)
.map_err(|e| CliError::config(format!("Invalid preset format: {}", e)))?;
let config = preset_data
.get("config")
.ok_or_else(|| CliError::config("Preset missing config section"))?;
return serde_json::from_value(config.clone())
.map_err(|e| CliError::config(format!("Invalid emotion config: {}", e)));
}
}
if get_available_emotions()
.iter()
.any(|(emotion_name, _, _)| *emotion_name == name)
{
let params = get_emotion_parameters(name);
let mut config = EmotionConfig::default();
return Ok(config);
}
Err(CliError::config(format!(
"Emotion preset '{}' not found",
name
)))
}
#[cfg(feature = "emotion")]
async fn test_synthesis_with_preset(
preset_name: &str,
text: &str,
) -> std::result::Result<f32, CliError> {
let pipeline = VoirsPipelineBuilder::new()
.with_quality(QualityLevel::Medium)
.build()
.await
.map_err(|e| CliError::config(format!("Failed to create test pipeline: {}", e)))?;
let mut synthesis_config = SynthesisConfig::default();
let emotion_params = get_emotion_parameters(preset_name);
if let Some(pitch_shift) = emotion_params.get("pitch_shift").and_then(|v| v.as_f64()) {
synthesis_config.pitch_shift = pitch_shift as f32;
}
if let Some(tempo_scale) = emotion_params.get("tempo_scale").and_then(|v| v.as_f64()) {
synthesis_config.speaking_rate = tempo_scale as f32;
}
let test_text = if text.len() > 50 {
format!("{}...", &text[..47])
} else {
text.to_string()
};
let audio = pipeline
.synthesize_with_config(&test_text, &synthesis_config)
.await
.map_err(|e| CliError::config(format!("Test synthesis failed: {}", e)))?;
let quality_score = calculate_audio_quality_score(&audio);
Ok(quality_score)
}
fn calculate_audio_quality_score(audio: &AudioBuffer) -> f32 {
let samples = audio.samples();
if samples.is_empty() {
return 0.0;
}
let clipping_ratio = samples
.iter()
.filter(|&&sample| sample.abs() > 0.95)
.count() as f32
/ samples.len() as f32;
let silence_ratio = samples
.iter()
.filter(|&&sample| sample.abs() < 0.01)
.count() as f32
/ samples.len() as f32;
let rms = (samples.iter().map(|&s| s * s).sum::<f32>() / samples.len() as f32).sqrt();
let mut score = 10.0;
score -= clipping_ratio * 5.0; score -= if silence_ratio > 0.8 { 4.0 } else { 0.0 }; score -= if rms < 0.1 { 3.0 } else { 0.0 };
score.clamp(0.0, 10.0)
}
fn calculate_naturalness_score(quality_score: f32) -> f32 {
let base_naturalness = quality_score * 0.8 + 1.0; base_naturalness.clamp(0.0, 10.0)
}