use voxudio::*;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let mut vad = VoiceActivityDetector::new("../checkpoint/voice_activity_detector.onnx")?;
let mut see = SpeakerEmbeddingExtractor::new("../checkpoint/speaker_embedding_extractor.onnx")?;
let mut tcc = ToneColorConverter::new("../checkpoint/tone_color_converter.onnx")?;
let (src_audio, src_channels) =
load_audio::<22050, f32, _>("../asset/test6.wav", false).await?;
let vad_audio = vad
.retain_speech_only::<22050>(&src_audio, src_channels)
.await?;
let src_se = see.extract(&vad_audio, src_channels).await?;
let (tgt_audio, tgt_channels) =
load_audio::<22050, f32, _>("../asset/bajie.mp3", false).await?;
let vad_audio = vad
.retain_speech_only::<22050>(&tgt_audio, tgt_channels)
.await?;
let tgt_se = see.extract(&vad_audio, tgt_channels).await?;
let (out_audio, out_channels, cost) = tcc.convert(&src_audio, &src_se, &tgt_se).await?;
println!("Convert cost: {:?}", cost);
let mut ap = AudioPlayer::new()?;
ap.play()?;
ap.write::<22050, f32>(&out_audio, out_channels).await?;
Ok(())
}