1use crate::{error::CliError, output::OutputFormatter};
4use clap::{Args, Subcommand};
5use hound;
6use std::path::PathBuf;
7#[cfg(feature = "conversion")]
8use voirs_conversion::{
9 AgeTransform, ConversionConfig, ConversionRequest, ConversionTarget, ConversionType,
10 GenderTransform, PitchTransform, VoiceCharacteristics, VoiceConverter,
11};
12
13#[cfg(feature = "conversion")]
15#[derive(Debug, Clone, Subcommand)]
16pub enum ConversionCommand {
17 Speaker(SpeakerArgs),
19 Age(AgeArgs),
21 Gender(GenderArgs),
23 Morph(MorphArgs),
25 Stream(StreamArgs),
27 ListModels(ListModelsArgs),
29}
30
31#[derive(Debug, Clone, Args)]
32pub struct SpeakerArgs {
33 pub input: PathBuf,
35 #[arg(long)]
37 pub target_speaker: String,
38 pub output: PathBuf,
40 #[arg(long, default_value = "1.0")]
42 pub strength: f32,
43 #[arg(long, default_value = "22050")]
45 pub sample_rate: u32,
46}
47
48#[derive(Debug, Clone, Args)]
49pub struct AgeArgs {
50 pub input: PathBuf,
52 #[arg(long)]
54 pub target_age: u32,
55 pub output: PathBuf,
57 #[arg(long, default_value = "1.0")]
59 pub strength: f32,
60 #[arg(long, default_value = "22050")]
62 pub sample_rate: u32,
63}
64
65#[derive(Debug, Clone, Args)]
66pub struct GenderArgs {
67 pub input: PathBuf,
69 #[arg(long)]
71 pub target_gender: String,
72 pub output: PathBuf,
74 #[arg(long, default_value = "1.0")]
76 pub strength: f32,
77 #[arg(long, default_value = "22050")]
79 pub sample_rate: u32,
80}
81
82#[derive(Debug, Clone, Args)]
83pub struct MorphArgs {
84 pub input: PathBuf,
86 #[arg(long)]
88 pub voice1: String,
89 #[arg(long)]
91 pub voice2: String,
92 #[arg(long, default_value = "0.5")]
94 pub ratio: f32,
95 pub output: PathBuf,
97 #[arg(long, default_value = "22050")]
99 pub sample_rate: u32,
100}
101
102#[derive(Debug, Clone, Args)]
103pub struct StreamArgs {
104 #[arg(long, default_value = "mic")]
106 pub input: String,
107 #[arg(long)]
109 pub target: String,
110 #[arg(long, default_value = "speaker")]
112 pub output: String,
113 #[arg(long, default_value = "100")]
115 pub buffer_ms: u32,
116 #[arg(long)]
118 pub monitor: bool,
119}
120
121#[derive(Debug, Clone, Args)]
122pub struct ListModelsArgs {
123 #[arg(long, default_value = "table")]
125 pub format: String,
126 #[arg(long)]
128 pub detailed: bool,
129}
130
131#[cfg(feature = "conversion")]
133pub async fn execute_conversion_command(
134 command: ConversionCommand,
135 output_formatter: &OutputFormatter,
136) -> Result<(), CliError> {
137 match command {
138 ConversionCommand::Speaker(args) => {
139 execute_speaker_conversion(args, output_formatter).await
140 }
141 ConversionCommand::Age(args) => execute_age_conversion(args, output_formatter).await,
142 ConversionCommand::Gender(args) => execute_gender_conversion(args, output_formatter).await,
143 ConversionCommand::Morph(args) => execute_morph_conversion(args, output_formatter).await,
144 ConversionCommand::Stream(args) => execute_stream_conversion(args, output_formatter).await,
145 ConversionCommand::ListModels(args) => execute_list_models(args, output_formatter).await,
146 }
147}
148
149#[cfg(feature = "conversion")]
150async fn execute_speaker_conversion(
151 args: SpeakerArgs,
152 output_formatter: &OutputFormatter,
153) -> Result<(), CliError> {
154 if !args.input.exists() {
155 return Err(CliError::config(format!(
156 "Input file not found: {}",
157 args.input.display()
158 )));
159 }
160
161 if args.strength < 0.0 || args.strength > 1.0 {
162 return Err(CliError::invalid_parameter(
163 "strength",
164 "Conversion strength must be between 0.0 and 1.0",
165 ));
166 }
167
168 println!("Converting speaker characteristics...");
169 println!(" Input: {}", args.input.display());
170 println!(" Target speaker: {}", args.target_speaker);
171 println!(" Strength: {:.2}", args.strength);
172
173 let audio_data = load_audio_file(&args.input)
175 .map_err(|e| CliError::AudioError(format!("Failed to load input audio: {}", e)))?;
176
177 let converter = VoiceConverter::new()
179 .map_err(|e| CliError::config(format!("Failed to create voice converter: {}", e)))?;
180
181 let target = ConversionTarget::new(create_speaker_characteristics(&args.target_speaker)?)
183 .with_strength(args.strength);
184
185 let request = ConversionRequest::new(
186 format!("speaker_conv_{}", fastrand::u64(..)),
187 audio_data.samples,
188 audio_data.sample_rate,
189 ConversionType::SpeakerConversion,
190 target,
191 );
192
193 let result = converter
195 .convert(request)
196 .await
197 .map_err(|e| CliError::AudioError(format!("Speaker conversion failed: {}", e)))?;
198
199 if result.success {
200 save_audio_file(&result.converted_audio, args.sample_rate, &args.output)
202 .map_err(|e| CliError::AudioError(format!("Failed to save converted audio: {}", e)))?;
203
204 let quality_score = result
205 .quality_metrics
206 .get("overall_quality")
207 .copied()
208 .unwrap_or(0.0);
209 output_formatter.success(&format!(
210 "Speaker conversion completed! Quality score: {:.2}, Output saved to: {}",
211 quality_score,
212 args.output.display()
213 ));
214 } else {
215 let error_msg = result.error_message.unwrap_or("Unknown error".to_string());
216 return Err(CliError::AudioError(format!(
217 "Speaker conversion failed: {}",
218 error_msg
219 )));
220 }
221
222 Ok(())
223}
224
225#[cfg(feature = "conversion")]
226async fn execute_age_conversion(
227 args: AgeArgs,
228 output_formatter: &OutputFormatter,
229) -> Result<(), CliError> {
230 if !args.input.exists() {
231 return Err(CliError::config(format!(
232 "Input file not found: {}",
233 args.input.display()
234 )));
235 }
236
237 if args.strength < 0.0 || args.strength > 1.0 {
238 return Err(CliError::invalid_parameter(
239 "strength",
240 "Conversion strength must be between 0.0 and 1.0",
241 ));
242 }
243
244 if args.target_age < 5 || args.target_age > 100 {
245 return Err(CliError::invalid_parameter(
246 "target_age",
247 "Target age must be between 5 and 100",
248 ));
249 }
250
251 println!("Converting age characteristics...");
252 println!(" Input: {}", args.input.display());
253 println!(" Target age: {} years", args.target_age);
254 println!(" Strength: {:.2}", args.strength);
255
256 let audio_data = load_audio_file(&args.input)
258 .map_err(|e| CliError::AudioError(format!("Failed to load input audio: {}", e)))?;
259
260 let converter = VoiceConverter::new()
262 .map_err(|e| CliError::config(format!("Failed to create voice converter: {}", e)))?;
263
264 let age_transform = AgeTransform::new(args.target_age as f32, args.strength);
266
267 let target = ConversionTarget::new(create_age_characteristics(args.target_age)?)
269 .with_strength(args.strength);
270
271 let request = ConversionRequest::new(
272 format!("age_conv_{}", fastrand::u64(..)),
273 audio_data.samples,
274 audio_data.sample_rate,
275 ConversionType::AgeTransformation,
276 target,
277 );
278
279 let result = converter
281 .convert(request)
282 .await
283 .map_err(|e| CliError::AudioError(format!("Age conversion failed: {}", e)))?;
284
285 if result.success {
286 save_audio_file(&result.converted_audio, args.sample_rate, &args.output)
288 .map_err(|e| CliError::AudioError(format!("Failed to save converted audio: {}", e)))?;
289
290 let quality_score = result
291 .quality_metrics
292 .get("overall_quality")
293 .copied()
294 .unwrap_or(0.0);
295 output_formatter.success(&format!(
296 "Age conversion completed! Quality score: {:.2}, Output saved to: {}",
297 quality_score,
298 args.output.display()
299 ));
300 } else {
301 let error_msg = result.error_message.unwrap_or("Unknown error".to_string());
302 return Err(CliError::AudioError(format!(
303 "Age conversion failed: {}",
304 error_msg
305 )));
306 }
307
308 Ok(())
309}
310
311#[cfg(feature = "conversion")]
312async fn execute_gender_conversion(
313 args: GenderArgs,
314 output_formatter: &OutputFormatter,
315) -> Result<(), CliError> {
316 if !args.input.exists() {
317 return Err(CliError::config(format!(
318 "Input file not found: {}",
319 args.input.display()
320 )));
321 }
322
323 if args.strength < 0.0 || args.strength > 1.0 {
324 return Err(CliError::invalid_parameter(
325 "strength",
326 "Conversion strength must be between 0.0 and 1.0",
327 ));
328 }
329
330 let target_gender = match args.target_gender.to_lowercase().as_str() {
331 "male" | "m" => "male",
332 "female" | "f" => "female",
333 "neutral" | "n" => "neutral",
334 _ => {
335 return Err(CliError::invalid_parameter(
336 "target_gender",
337 "Target gender must be 'male', 'female', or 'neutral'",
338 ))
339 }
340 };
341
342 println!("Converting gender characteristics...");
343 println!(" Input: {}", args.input.display());
344 println!(" Target gender: {}", target_gender);
345 println!(" Strength: {:.2}", args.strength);
346
347 let audio_data = load_audio_file(&args.input)
349 .map_err(|e| CliError::AudioError(format!("Failed to load input audio: {}", e)))?;
350
351 let converter = VoiceConverter::new()
353 .map_err(|e| CliError::config(format!("Failed to create voice converter: {}", e)))?;
354
355 let target = ConversionTarget::new(create_gender_characteristics(target_gender)?)
357 .with_strength(args.strength);
358
359 let request = ConversionRequest::new(
360 format!("gender_conv_{}", fastrand::u64(..)),
361 audio_data.samples,
362 audio_data.sample_rate,
363 ConversionType::GenderTransformation,
364 target,
365 );
366
367 let result = converter
369 .convert(request)
370 .await
371 .map_err(|e| CliError::AudioError(format!("Gender conversion failed: {}", e)))?;
372
373 if result.success {
374 save_audio_file(&result.converted_audio, args.sample_rate, &args.output)
376 .map_err(|e| CliError::AudioError(format!("Failed to save converted audio: {}", e)))?;
377
378 let quality_score = result
379 .quality_metrics
380 .get("overall_quality")
381 .copied()
382 .unwrap_or(0.0);
383 output_formatter.success(&format!(
384 "Gender conversion completed! Quality score: {:.2}, Output saved to: {}",
385 quality_score,
386 args.output.display()
387 ));
388 } else {
389 let error_msg = result.error_message.unwrap_or("Unknown error".to_string());
390 return Err(CliError::AudioError(format!(
391 "Gender conversion failed: {}",
392 error_msg
393 )));
394 }
395
396 Ok(())
397}
398
399#[cfg(feature = "conversion")]
400async fn execute_morph_conversion(
401 args: MorphArgs,
402 output_formatter: &OutputFormatter,
403) -> Result<(), CliError> {
404 if !args.input.exists() {
405 return Err(CliError::config(format!(
406 "Input file not found: {}",
407 args.input.display()
408 )));
409 }
410
411 if args.ratio < 0.0 || args.ratio > 1.0 {
412 return Err(CliError::invalid_parameter(
413 "ratio",
414 "Morphing ratio must be between 0.0 and 1.0",
415 ));
416 }
417
418 println!("Morphing between voice characteristics...");
419 println!(" Input: {}", args.input.display());
420 println!(" Voice 1: {}", args.voice1);
421 println!(" Voice 2: {}", args.voice2);
422 println!(" Ratio: {:.2}", args.ratio);
423
424 let audio_data = load_audio_file(&args.input)
426 .map_err(|e| CliError::AudioError(format!("Failed to load input audio: {}", e)))?;
427
428 let converter = VoiceConverter::new()
430 .map_err(|e| CliError::config(format!("Failed to create voice converter: {}", e)))?;
431
432 let voice1_chars = create_speaker_characteristics(&args.voice1)?;
434 let voice2_chars = create_speaker_characteristics(&args.voice2)?;
435 let morphed_chars = voice1_chars.interpolate(&voice2_chars, args.ratio);
436 let target = ConversionTarget::new(morphed_chars);
437
438 let request = ConversionRequest::new(
439 format!("morph_conv_{}", fastrand::u64(..)),
440 audio_data.samples,
441 audio_data.sample_rate,
442 ConversionType::VoiceMorphing,
443 target,
444 );
445
446 let result = converter
448 .convert(request)
449 .await
450 .map_err(|e| CliError::AudioError(format!("Voice morphing failed: {}", e)))?;
451
452 if result.success {
453 save_audio_file(&result.converted_audio, args.sample_rate, &args.output)
455 .map_err(|e| CliError::AudioError(format!("Failed to save morphed audio: {}", e)))?;
456
457 let quality_score = result
458 .quality_metrics
459 .get("overall_quality")
460 .copied()
461 .unwrap_or(0.0);
462 output_formatter.success(&format!(
463 "Voice morphing completed! Quality score: {:.2}, Output saved to: {}",
464 quality_score,
465 args.output.display()
466 ));
467 } else {
468 let error_msg = result.error_message.unwrap_or("Unknown error".to_string());
469 return Err(CliError::AudioError(format!(
470 "Voice morphing failed: {}",
471 error_msg
472 )));
473 }
474
475 Ok(())
476}
477
478#[cfg(feature = "conversion")]
479async fn execute_stream_conversion(
480 args: StreamArgs,
481 output_formatter: &OutputFormatter,
482) -> Result<(), CliError> {
483 if args.buffer_ms < 50 || args.buffer_ms > 1000 {
484 return Err(CliError::invalid_parameter(
485 "buffer_ms",
486 "Buffer size must be between 50 and 1000 milliseconds",
487 ));
488 }
489
490 println!("Starting real-time voice conversion...");
491 println!(" Input: {}", args.input);
492 println!(" Target: {}", args.target);
493 println!(" Output: {}", args.output);
494 println!(" Buffer: {}ms", args.buffer_ms);
495
496 let converter = VoiceConverter::new()
498 .map_err(|e| CliError::config(format!("Failed to create voice converter: {}", e)))?;
499
500 let target = ConversionTarget::new(create_speaker_characteristics(&args.target)?);
502
503 if args.monitor {
504 println!("Monitoring enabled. Press Ctrl+C to stop.");
505 }
506
507 println!("Real-time conversion started...");
509 println!("Note: This is a simplified demonstration. Full streaming implementation requires audio device integration.");
510
511 for i in 0..10 {
513 println!("Processing chunk {}/10...", i + 1);
514 tokio::time::sleep(tokio::time::Duration::from_millis(args.buffer_ms as u64)).await;
515
516 if args.monitor {
517 println!(
518 " Chunk {}: Quality OK, Latency: {}ms",
519 i + 1,
520 args.buffer_ms
521 );
522 }
523 }
524
525 output_formatter.success("Streaming conversion simulation completed successfully!");
526 Ok(())
527}
528
529#[cfg(feature = "conversion")]
530async fn execute_list_models(
531 args: ListModelsArgs,
532 output_formatter: &OutputFormatter,
533) -> Result<(), CliError> {
534 let converter = VoiceConverter::new()
536 .map_err(|e| CliError::config(format!("Failed to create voice converter: {}", e)))?;
537
538 let models = get_available_conversion_models();
539
540 if models.is_empty() {
541 println!("No conversion models found.");
542 return Ok(());
543 }
544
545 match args.format.as_str() {
546 "table" => {
547 println!(
548 "{:<20} {:<15} {:<30} Version",
549 "Model ID", "Type", "Description"
550 );
551 println!("{}", "-".repeat(80));
552 for model in models {
553 println!(
554 "{:<20} {:<15} {:<30} {}",
555 model.id, model.model_type, model.description, model.version
556 );
557 }
558 }
559 "json" => {
560 let json_models: Vec<_> = models
561 .iter()
562 .map(|model| {
563 serde_json::json!({
564 "id": model.id,
565 "type": model.model_type,
566 "description": model.description,
567 "version": model.version,
568 "details": if args.detailed {
569 Some(serde_json::json!({
570 "supported_formats": model.supported_formats,
571 "latency_ms": model.latency_ms
572 }))
573 } else {
574 None
575 }
576 })
577 })
578 .collect();
579
580 println!(
581 "{}",
582 serde_json::to_string_pretty(&json_models).map_err(CliError::Serialization)?
583 );
584 }
585 _ => {
586 for model in models {
587 println!("{}: {} ({})", model.id, model.description, model.model_type);
588 }
589 }
590 }
591
592 Ok(())
593}
594
595#[derive(Debug)]
597struct AudioData {
598 samples: Vec<f32>,
599 sample_rate: u32,
600}
601
602#[derive(Debug)]
604struct ConversionModelInfo {
605 id: String,
606 model_type: String,
607 description: String,
608 version: String,
609 supported_formats: Vec<String>,
610 latency_ms: u32,
611}
612
613fn load_audio_file(path: &PathBuf) -> Result<AudioData, Box<dyn std::error::Error>> {
615 let mut reader = hound::WavReader::open(path)?;
616 let spec = reader.spec();
617
618 let samples: Result<Vec<f32>, _> = match spec.sample_format {
620 hound::SampleFormat::Float => reader.samples::<f32>().collect(),
621 hound::SampleFormat::Int => match spec.bits_per_sample {
622 16 => reader
623 .samples::<i16>()
624 .map(|s| s.map(|sample| sample as f32 / i16::MAX as f32))
625 .collect(),
626 24 => reader
627 .samples::<i32>()
628 .map(|s| s.map(|sample| (sample >> 8) as f32 / (i32::MAX >> 8) as f32))
629 .collect(),
630 32 => reader
631 .samples::<i32>()
632 .map(|s| s.map(|sample| sample as f32 / i32::MAX as f32))
633 .collect(),
634 _ => {
635 return Err(format!("Unsupported bit depth: {}", spec.bits_per_sample).into());
636 }
637 },
638 };
639
640 let samples = samples?;
641
642 let mono_samples = if spec.channels == 2 {
644 samples
645 .chunks(2)
646 .map(|frame| (frame[0] + frame[1]) / 2.0)
647 .collect()
648 } else {
649 samples
650 };
651
652 Ok(AudioData {
653 samples: mono_samples,
654 sample_rate: spec.sample_rate,
655 })
656}
657
658fn save_audio_file(
660 audio_data: &[f32],
661 sample_rate: u32,
662 path: &PathBuf,
663) -> Result<(), Box<dyn std::error::Error>> {
664 let spec = hound::WavSpec {
665 channels: 1,
666 sample_rate,
667 bits_per_sample: 16,
668 sample_format: hound::SampleFormat::Int,
669 };
670
671 let mut writer = hound::WavWriter::create(path, spec)?;
672
673 for &sample in audio_data {
674 let sample_i16 = (sample * i16::MAX as f32) as i16;
675 writer.write_sample(sample_i16)?;
676 }
677
678 writer.finalize()?;
679 Ok(())
680}
681
682fn create_speaker_characteristics(speaker_id: &str) -> Result<VoiceCharacteristics, CliError> {
684 use voirs_conversion::types::{AgeGroup, Gender};
685
686 match speaker_id.to_lowercase().as_str() {
688 "young_male" => Ok(VoiceCharacteristics::for_age(AgeGroup::YoungAdult)
689 .interpolate(&VoiceCharacteristics::for_gender(Gender::Male), 0.5)),
690 "young_female" => Ok(VoiceCharacteristics::for_age(AgeGroup::YoungAdult)
691 .interpolate(&VoiceCharacteristics::for_gender(Gender::Female), 0.5)),
692 "adult_male" => Ok(VoiceCharacteristics::for_age(AgeGroup::MiddleAged)
693 .interpolate(&VoiceCharacteristics::for_gender(Gender::Male), 0.5)),
694 "adult_female" => Ok(VoiceCharacteristics::for_age(AgeGroup::MiddleAged)
695 .interpolate(&VoiceCharacteristics::for_gender(Gender::Female), 0.5)),
696 "elderly_male" => Ok(VoiceCharacteristics::for_age(AgeGroup::Senior)
697 .interpolate(&VoiceCharacteristics::for_gender(Gender::Male), 0.5)),
698 "elderly_female" => Ok(VoiceCharacteristics::for_age(AgeGroup::Senior)
699 .interpolate(&VoiceCharacteristics::for_gender(Gender::Female), 0.5)),
700 _ => {
701 Ok(VoiceCharacteristics::default())
703 }
704 }
705}
706
707fn create_age_characteristics(age: u32) -> Result<VoiceCharacteristics, CliError> {
709 use voirs_conversion::types::AgeGroup;
710
711 let age_group = match age {
712 0..=12 => AgeGroup::Child,
713 13..=19 => AgeGroup::Teen,
714 20..=35 => AgeGroup::YoungAdult,
715 36..=55 => AgeGroup::MiddleAged,
716 _ => AgeGroup::Senior,
717 };
718
719 Ok(VoiceCharacteristics::for_age(age_group))
720}
721
722fn create_gender_characteristics(gender: &str) -> Result<VoiceCharacteristics, CliError> {
724 use voirs_conversion::types::Gender;
725
726 let gender_type = match gender.to_lowercase().as_str() {
727 "male" | "m" => Gender::Male,
728 "female" | "f" => Gender::Female,
729 "other" => Gender::Other,
730 _ => Gender::Unknown,
731 };
732
733 Ok(VoiceCharacteristics::for_gender(gender_type))
734}
735
736fn get_available_conversion_models() -> Vec<ConversionModelInfo> {
738 vec![
739 ConversionModelInfo {
740 id: "speaker_conv_v1".to_string(),
741 model_type: "Speaker".to_string(),
742 description: "General purpose speaker conversion".to_string(),
743 version: "1.0.0".to_string(),
744 supported_formats: vec!["wav".to_string(), "mp3".to_string()],
745 latency_ms: 100,
746 },
747 ConversionModelInfo {
748 id: "age_conv_v1".to_string(),
749 model_type: "Age".to_string(),
750 description: "Age transformation model".to_string(),
751 version: "1.0.0".to_string(),
752 supported_formats: vec!["wav".to_string()],
753 latency_ms: 120,
754 },
755 ConversionModelInfo {
756 id: "gender_conv_v1".to_string(),
757 model_type: "Gender".to_string(),
758 description: "Gender transformation model".to_string(),
759 version: "1.0.0".to_string(),
760 supported_formats: vec!["wav".to_string()],
761 latency_ms: 110,
762 },
763 ConversionModelInfo {
764 id: "realtime_conv_v1".to_string(),
765 model_type: "Realtime".to_string(),
766 description: "Low-latency real-time conversion".to_string(),
767 version: "1.0.0".to_string(),
768 supported_formats: vec!["wav".to_string(), "raw".to_string()],
769 latency_ms: 50,
770 },
771 ]
772}