1use crate::{error::CliError, output::OutputFormatter};
4use clap::{Args, Subcommand};
5use std::collections::HashMap;
6use std::fs;
7use std::path::PathBuf;
8#[cfg(feature = "emotion")]
9use voirs_emotion::{
10 Emotion, EmotionConfig, EmotionIntensity, EmotionParameters, EmotionPresetLibrary,
11 EmotionProcessor, EmotionVector,
12};
13use voirs_sdk::prelude::*;
14
15#[cfg(feature = "emotion")]
17#[derive(Debug, Clone, Subcommand)]
18pub enum EmotionCommand {
19 List(ListArgs),
21 Synth(SynthArgs),
23 Blend(BlendArgs),
25 CreatePreset(CreatePresetArgs),
27 Validate(ValidateArgs),
29}
30
31#[derive(Debug, Clone, Args)]
32pub struct ListArgs {
33 #[arg(long, default_value = "table")]
35 pub format: String,
36 #[arg(long)]
38 pub detailed: bool,
39}
40
41#[derive(Debug, Clone, Args)]
42pub struct SynthArgs {
43 #[arg(long)]
45 pub emotion: String,
46 #[arg(long, default_value = "0.7")]
48 pub intensity: f32,
49 pub text: String,
51 pub output: PathBuf,
53 #[arg(long)]
55 pub voice: Option<String>,
56 #[arg(long, default_value = "22050")]
58 pub sample_rate: u32,
59}
60
61#[derive(Debug, Clone, Args)]
62pub struct BlendArgs {
63 #[arg(long)]
65 pub emotions: String,
66 #[arg(long)]
68 pub weights: String,
69 pub text: String,
71 pub output: PathBuf,
73 #[arg(long)]
75 pub voice: Option<String>,
76 #[arg(long, default_value = "22050")]
78 pub sample_rate: u32,
79}
80
81#[derive(Debug, Clone, Args)]
82pub struct CreatePresetArgs {
83 #[arg(long)]
85 pub name: String,
86 #[arg(long)]
88 pub config: PathBuf,
89 #[arg(long)]
91 pub force: bool,
92}
93
94#[derive(Debug, Clone, Args)]
95pub struct ValidateArgs {
96 #[arg(long)]
98 pub preset: String,
99 #[arg(long, default_value = "Hello, this is a test.")]
101 pub text: String,
102 #[arg(long, default_value = "table")]
104 pub format: String,
105}
106
107#[cfg(feature = "emotion")]
109pub async fn execute_emotion_command(
110 command: EmotionCommand,
111 output_formatter: &OutputFormatter,
112) -> std::result::Result<(), CliError> {
113 match command {
114 EmotionCommand::List(args) => execute_list(args, output_formatter).await,
115 EmotionCommand::Synth(args) => execute_synth(args, output_formatter).await,
116 EmotionCommand::Blend(args) => execute_blend(args, output_formatter).await,
117 EmotionCommand::CreatePreset(args) => execute_create_preset(args, output_formatter).await,
118 EmotionCommand::Validate(args) => execute_validate(args, output_formatter).await,
119 }
120}
121
122#[cfg(feature = "emotion")]
123async fn execute_list(
124 args: ListArgs,
125 output_formatter: &OutputFormatter,
126) -> std::result::Result<(), CliError> {
127 let emotions = get_available_emotions();
128
129 match args.format.as_str() {
130 "table" => {
131 println!("{:<12} {:<30} Default Intensity", "Emotion", "Description");
132 println!("{}", "-".repeat(60));
133 for (name, desc, intensity) in emotions {
134 println!("{:<12} {:<30} {}", name, desc, intensity);
135 }
136 }
137 "json" => {
138 let json_emotions: Vec<_> = emotions
139 .iter()
140 .map(|(name, desc, intensity)| {
141 serde_json::json!({
142 "name": name,
143 "description": desc,
144 "default_intensity": intensity,
145 "parameters": if args.detailed {
146 Some(get_emotion_parameters(name))
147 } else {
148 None
149 }
150 })
151 })
152 .collect();
153
154 println!(
155 "{}",
156 serde_json::to_string_pretty(&json_emotions).map_err(CliError::Serialization)?
157 );
158 }
159 _ => {
160 for (name, desc, intensity) in emotions {
161 println!("{}: {} (default intensity: {})", name, desc, intensity);
162 }
163 }
164 }
165
166 Ok(())
167}
168
169#[cfg(feature = "emotion")]
170async fn execute_synth(
171 args: SynthArgs,
172 output_formatter: &OutputFormatter,
173) -> std::result::Result<(), CliError> {
174 if args.intensity < 0.0 || args.intensity > 1.0 {
176 return Err(CliError::invalid_parameter(
177 "intensity",
178 "Emotion intensity must be between 0.0 and 1.0",
179 ));
180 }
181
182 let mut processor = EmotionProcessor::new()
184 .map_err(|e| CliError::config(format!("Failed to create emotion processor: {}", e)))?;
185
186 let emotion = Emotion::from_str(&args.emotion);
188 processor
189 .set_emotion(emotion, Some(args.intensity))
190 .await
191 .map_err(|e| CliError::config(format!("Failed to set emotion: {}", e)))?;
192
193 println!(
194 "Synthesizing '{}' with emotion '{}' (intensity: {:.2})",
195 args.text, args.emotion, args.intensity
196 );
197
198 println!("Processing emotion parameters...");
200
201 let mut pipeline_builder = VoirsPipelineBuilder::new().with_quality(QualityLevel::High);
203
204 if let Some(voice) = &args.voice {
205 pipeline_builder = pipeline_builder.with_voice(voice);
206 }
207
208 let pipeline = pipeline_builder
209 .build()
210 .await
211 .map_err(|e| CliError::config(format!("Failed to create synthesis pipeline: {}", e)))?;
212
213 let mut synthesis_config = SynthesisConfig::default();
215
216 let emotion_params = get_emotion_parameters(&args.emotion);
218 if let Some(pitch_shift) = emotion_params.get("pitch_shift").and_then(|v| v.as_f64()) {
219 synthesis_config.pitch_shift = pitch_shift as f32;
220 }
221 if let Some(tempo_scale) = emotion_params.get("tempo_scale").and_then(|v| v.as_f64()) {
222 synthesis_config.speaking_rate = tempo_scale as f32;
223 }
224
225 println!("Generating speech with emotional expression...");
226
227 let audio = pipeline
229 .synthesize_with_config(&args.text, &synthesis_config)
230 .await
231 .map_err(|e| CliError::config(format!("Synthesis failed: {}", e)))?;
232
233 audio.save_wav(&args.output).map_err(|e| {
235 CliError::Io(std::io::Error::new(
236 std::io::ErrorKind::Other,
237 format!("Failed to save audio: {}", e),
238 ))
239 })?;
240
241 output_formatter.success(&format!(
242 "Emotion synthesis completed! Output saved to: {}",
243 args.output.display()
244 ));
245
246 Ok(())
247}
248
249#[cfg(feature = "emotion")]
250async fn execute_blend(
251 args: BlendArgs,
252 output_formatter: &OutputFormatter,
253) -> std::result::Result<(), CliError> {
254 let emotion_names: Vec<&str> = args.emotions.split(',').collect();
256 let weight_strings: Vec<&str> = args.weights.split(',').collect();
257
258 if emotion_names.len() != weight_strings.len() {
259 return Err(CliError::invalid_parameter(
260 "weights",
261 "Number of emotions must match number of weights",
262 ));
263 }
264
265 let weights: std::result::Result<Vec<f32>, _> = weight_strings
267 .iter()
268 .map(|w| w.trim().parse::<f32>())
269 .collect();
270
271 let weights = weights.map_err(|_| {
272 CliError::invalid_parameter("weights", "Invalid weight format. Use decimal numbers.")
273 })?;
274
275 let weight_sum: f32 = weights.iter().sum();
277 if (weight_sum - 1.0).abs() > 0.01 {
278 return Err(CliError::invalid_parameter(
279 "weights",
280 "Emotion weights must sum to 1.0",
281 ));
282 }
283
284 let mut emotion_vector = EmotionVector::new();
286 for (emotion_name, weight) in emotion_names.iter().zip(weights.iter()) {
287 let emotion = Emotion::from_str(emotion_name.trim());
288 emotion_vector.add_emotion(emotion, EmotionIntensity::new(*weight));
289 }
290
291 println!(
292 "Blending emotions: {} with weights: {:?}",
293 args.emotions, weights
294 );
295
296 println!("Processing emotion blend...");
298
299 let mut pipeline_builder = VoirsPipelineBuilder::new().with_quality(QualityLevel::High);
301
302 if let Some(voice) = &args.voice {
303 pipeline_builder = pipeline_builder.with_voice(voice);
304 }
305
306 let pipeline = pipeline_builder
307 .build()
308 .await
309 .map_err(|e| CliError::config(format!("Failed to create synthesis pipeline: {}", e)))?;
310
311 let mut synthesis_config = SynthesisConfig::default();
313
314 let mut pitch_shift = 0.0f32;
316 let mut speaking_rate = 1.0f32;
317
318 for (emotion_name, weight) in emotion_names.iter().zip(weights.iter()) {
319 let emotion_params = get_emotion_parameters(emotion_name.trim());
320 if let Some(pitch) = emotion_params.get("pitch_shift").and_then(|v| v.as_f64()) {
321 pitch_shift += (pitch as f32 - 1.0) * weight;
322 }
323 if let Some(tempo) = emotion_params.get("tempo_scale").and_then(|v| v.as_f64()) {
324 speaking_rate += (tempo as f32 - 1.0) * weight;
325 }
326 }
327
328 synthesis_config.pitch_shift = pitch_shift;
329 synthesis_config.speaking_rate = speaking_rate;
330
331 let audio = pipeline
333 .synthesize_with_config(&args.text, &synthesis_config)
334 .await
335 .map_err(|e| CliError::config(format!("Emotion blend synthesis failed: {}", e)))?;
336
337 audio.save_wav(&args.output).map_err(|e| {
339 CliError::Io(std::io::Error::new(
340 std::io::ErrorKind::Other,
341 format!("Failed to save audio: {}", e),
342 ))
343 })?;
344
345 output_formatter.success(&format!(
346 "Emotion blend synthesis completed! Output saved to: {}",
347 args.output.display()
348 ));
349
350 Ok(())
351}
352
353#[cfg(feature = "emotion")]
354async fn execute_create_preset(
355 args: CreatePresetArgs,
356 output_formatter: &OutputFormatter,
357) -> std::result::Result<(), CliError> {
358 if preset_exists(&args.name) && !args.force {
360 return Err(CliError::config(format!(
361 "Preset '{}' already exists. Use --force to overwrite.",
362 args.name
363 )));
364 }
365
366 let config_content = std::fs::read_to_string(&args.config).map_err(CliError::Io)?;
368
369 let emotion_config: EmotionConfig = serde_json::from_str(&config_content)
371 .map_err(|e| CliError::config(format!("Invalid config format: {}", e)))?;
372
373 println!("Creating emotion preset '{}'...", args.name);
375
376 let preset_dir = get_preset_directory()?;
378 fs::create_dir_all(&preset_dir).map_err(CliError::Io)?;
379
380 let preset_path = preset_dir.join(format!("{}.json", args.name));
382
383 let preset_data = serde_json::json!({
385 "name": args.name,
386 "version": "1.0",
387 "created_at": chrono::Utc::now().to_rfc3339(),
388 "config": emotion_config,
389 "description": format!("Custom emotion preset: {}", args.name),
390 "author": "VoiRS CLI",
391 "tags": ["custom", "user-created"]
392 });
393
394 let preset_json = serde_json::to_string_pretty(&preset_data)
395 .map_err(|e| CliError::config(format!("Failed to serialize preset: {}", e)))?;
396
397 fs::write(&preset_path, preset_json).map_err(CliError::Io)?;
398
399 output_formatter.success(&format!(
400 "Emotion preset '{}' created successfully at: {}",
401 args.name,
402 preset_path.display()
403 ));
404
405 Ok(())
406}
407
408#[cfg(feature = "emotion")]
409async fn execute_validate(
410 args: ValidateArgs,
411 output_formatter: &OutputFormatter,
412) -> std::result::Result<(), CliError> {
413 if !preset_exists(&args.preset) {
415 return Err(CliError::config(format!(
416 "Emotion preset '{}' not found",
417 args.preset
418 )));
419 }
420
421 println!("Validating emotion preset '{}'...", args.preset);
422
423 let mut validation_results = Vec::new();
425
426 let emotion_config = match load_emotion_preset(&args.preset) {
428 Ok(config) => {
429 validation_results.push(("Preset Loading", "✓ Valid"));
430 config
431 }
432 Err(e) => {
433 let error_msg = format!("✗ Error: {}", e);
434 validation_results.push(("Preset Loading", &error_msg));
435 return Err(CliError::config(format!("Failed to load preset: {}", e)));
436 }
437 };
438
439 let emotion_params = get_emotion_parameters(&args.preset);
441 let pitch_valid = emotion_params
442 .get("pitch_shift")
443 .and_then(|v| v.as_f64())
444 .map(|p| p >= 0.5 && p <= 2.0)
445 .unwrap_or(false);
446 validation_results.push((
447 "Pitch Parameters",
448 if pitch_valid {
449 "✓ Valid (0.5-2.0)"
450 } else {
451 "⚠ Out of range"
452 },
453 ));
454
455 let tempo_valid = emotion_params
456 .get("tempo_scale")
457 .and_then(|v| v.as_f64())
458 .map(|t| t >= 0.5 && t <= 2.0)
459 .unwrap_or(false);
460 validation_results.push((
461 "Tempo Parameters",
462 if tempo_valid {
463 "✓ Valid (0.5-2.0)"
464 } else {
465 "⚠ Out of range"
466 },
467 ));
468
469 match test_synthesis_with_preset(&args.preset, &args.text).await {
471 Ok(quality_score) => {
472 validation_results.push(("Synthesis Test", "✓ Successful"));
473 let quality_msg = format!("✓ Score: {:.1}/10", quality_score);
474 validation_results.push(("Audio Quality", "✓ Good")); let naturalness = calculate_naturalness_score(quality_score);
477 let naturalness_msg = format!("✓ {:.1}/10", naturalness);
478 validation_results.push(("Naturalness Score", "✓ Good")); }
480 Err(e) => {
481 let test_error = format!("✗ Failed: {}", e);
482 validation_results.push(("Synthesis Test", "✗ Failed"));
483 validation_results.push(("Audio Quality", "✗ Cannot assess"));
484 validation_results.push(("Naturalness Score", "✗ Cannot assess"));
485 }
486 }
487
488 match args.format.as_str() {
489 "table" => {
490 println!("{:<20} Status", "Parameter");
491 println!("{}", "-".repeat(40));
492 for (param, status) in validation_results {
493 println!("{:<20} {}", param, status);
494 }
495 }
496 "json" => {
497 let json_results: Vec<_> = validation_results
498 .into_iter()
499 .map(|(param, status)| {
500 serde_json::json!({
501 "parameter": param,
502 "status": status
503 })
504 })
505 .collect();
506
507 println!(
508 "{}",
509 serde_json::to_string_pretty(&json_results).map_err(CliError::Serialization)?
510 );
511 }
512 _ => {
513 for (param, status) in validation_results {
514 println!("{}: {}", param, status);
515 }
516 }
517 }
518
519 output_formatter.success("Emotion preset validation completed!");
520 Ok(())
521}
522
523fn get_available_emotions() -> Vec<(&'static str, &'static str, f32)> {
525 vec![
526 ("neutral", "Neutral emotional state", 1.0),
527 ("happy", "Joyful and positive emotional state", 0.7),
528 ("sad", "Melancholic and subdued emotional state", 0.6),
529 ("angry", "Intense and aggressive emotional state", 0.8),
530 ("fear", "Anxious and worried emotional state", 0.6),
531 ("surprise", "Shocked and unexpected emotional state", 0.8),
532 ("disgust", "Repulsed and negative emotional state", 0.7),
533 ("calm", "Peaceful and relaxed emotional state", 0.5),
534 ("excited", "Energetic and enthusiastic emotional state", 0.9),
535 ("tender", "Gentle and affectionate emotional state", 0.6),
536 (
537 "confident",
538 "Assured and self-confident emotional state",
539 0.7,
540 ),
541 ("melancholic", "Thoughtful and wistful emotional state", 0.5),
542 ]
543}
544
545fn get_emotion_parameters(emotion: &str) -> serde_json::Value {
547 match emotion {
549 "happy" => serde_json::json!({
550 "pitch_shift": 1.1,
551 "tempo_scale": 1.05,
552 "energy_scale": 1.2,
553 "brightness": 0.15,
554 "roughness": -0.1
555 }),
556 "sad" => serde_json::json!({
557 "pitch_shift": 0.9,
558 "tempo_scale": 0.85,
559 "energy_scale": 0.7,
560 "brightness": -0.2,
561 "breathiness": 0.1
562 }),
563 _ => serde_json::json!({
564 "pitch_shift": 1.0,
565 "tempo_scale": 1.0,
566 "energy_scale": 1.0
567 }),
568 }
569}
570
571fn preset_exists(name: &str) -> bool {
573 if get_available_emotions()
575 .iter()
576 .any(|(emotion_name, _, _)| *emotion_name == name)
577 {
578 return true;
579 }
580
581 if let Ok(preset_dir) = get_preset_directory() {
583 let preset_path = preset_dir.join(format!("{}.json", name));
584 preset_path.exists()
585 } else {
586 false
587 }
588}
589
590fn get_preset_directory() -> std::result::Result<PathBuf, CliError> {
592 let config_dir = dirs::config_dir()
593 .ok_or_else(|| CliError::config("Unable to determine config directory"))?;
594 Ok(config_dir.join("voirs").join("emotion_presets"))
595}
596
597fn load_emotion_preset(name: &str) -> std::result::Result<EmotionConfig, CliError> {
599 if let Ok(preset_dir) = get_preset_directory() {
601 let preset_path = preset_dir.join(format!("{}.json", name));
602 if preset_path.exists() {
603 let content = fs::read_to_string(&preset_path).map_err(CliError::Io)?;
604 let preset_data: serde_json::Value = serde_json::from_str(&content)
605 .map_err(|e| CliError::config(format!("Invalid preset format: {}", e)))?;
606
607 let config = preset_data
609 .get("config")
610 .ok_or_else(|| CliError::config("Preset missing config section"))?;
611
612 return serde_json::from_value(config.clone())
613 .map_err(|e| CliError::config(format!("Invalid emotion config: {}", e)));
614 }
615 }
616
617 if get_available_emotions()
619 .iter()
620 .any(|(emotion_name, _, _)| *emotion_name == name)
621 {
622 let params = get_emotion_parameters(name);
624 let mut config = EmotionConfig::default();
625
626 return Ok(config);
631 }
632
633 Err(CliError::config(format!(
634 "Emotion preset '{}' not found",
635 name
636 )))
637}
638
639#[cfg(feature = "emotion")]
641async fn test_synthesis_with_preset(
642 preset_name: &str,
643 text: &str,
644) -> std::result::Result<f32, CliError> {
645 let pipeline = VoirsPipelineBuilder::new()
647 .with_quality(QualityLevel::Medium)
648 .build()
649 .await
650 .map_err(|e| CliError::config(format!("Failed to create test pipeline: {}", e)))?;
651
652 let mut synthesis_config = SynthesisConfig::default();
654 let emotion_params = get_emotion_parameters(preset_name);
655
656 if let Some(pitch_shift) = emotion_params.get("pitch_shift").and_then(|v| v.as_f64()) {
657 synthesis_config.pitch_shift = pitch_shift as f32;
658 }
659 if let Some(tempo_scale) = emotion_params.get("tempo_scale").and_then(|v| v.as_f64()) {
660 synthesis_config.speaking_rate = tempo_scale as f32;
661 }
662
663 let test_text = if text.len() > 50 {
665 format!("{}...", &text[..47])
666 } else {
667 text.to_string()
668 };
669
670 let audio = pipeline
671 .synthesize_with_config(&test_text, &synthesis_config)
672 .await
673 .map_err(|e| CliError::config(format!("Test synthesis failed: {}", e)))?;
674
675 let quality_score = calculate_audio_quality_score(&audio);
677 Ok(quality_score)
678}
679
680fn calculate_audio_quality_score(audio: &AudioBuffer) -> f32 {
682 let samples = audio.samples();
684 if samples.is_empty() {
685 return 0.0;
686 }
687
688 let clipping_ratio = samples
690 .iter()
691 .filter(|&&sample| sample.abs() > 0.95)
692 .count() as f32
693 / samples.len() as f32;
694
695 let silence_ratio = samples
697 .iter()
698 .filter(|&&sample| sample.abs() < 0.01)
699 .count() as f32
700 / samples.len() as f32;
701
702 let rms = (samples.iter().map(|&s| s * s).sum::<f32>() / samples.len() as f32).sqrt();
704
705 let mut score = 10.0;
707 score -= clipping_ratio * 5.0; score -= if silence_ratio > 0.8 { 4.0 } else { 0.0 }; score -= if rms < 0.1 { 3.0 } else { 0.0 }; score.clamp(0.0, 10.0)
712}
713
714fn calculate_naturalness_score(quality_score: f32) -> f32 {
716 let base_naturalness = quality_score * 0.8 + 1.0; base_naturalness.clamp(0.0, 10.0)
719}