use {std::f32::consts::PI, voxudio::*};
#[tokio::test]
async fn test_tcc_creation() {
let tcc = ToneColorConverter::new("../checkpoint/tone_color_converter.onnx");
assert!(tcc.is_ok(), "Should be able to create ToneColorConverter");
}
#[tokio::test]
async fn test_tcc_process() -> anyhow::Result<()> {
let mut tcc = ToneColorConverter::new("../checkpoint/tone_color_converter.onnx")?;
let sample_rate = 16000; let duration_secs = 3.0;
let num_samples = (sample_rate as f32 * duration_secs) as usize;
let mut source_audio = Vec::with_capacity(num_samples);
for i in 0..num_samples {
let t = i as f32 / sample_rate as f32;
let f0 = 120.0; let sample = 0.5 * (2.0 * PI * f0 * t).sin()
+ 0.3 * (2.0 * PI * f0 * 2.0 * t).sin()
+ 0.2 * (2.0 * PI * f0 * 3.0 * t).sin();
let envelope = 0.5 + 0.5 * (2.0 * PI * 0.5 * t).sin();
source_audio.push(sample * envelope * 0.5);
}
let source_features = [[0.0f32; 256]];
let target_features = [[0.0f32; 256]];
let converted_audio = tcc
.convert(&source_audio, &source_features, &target_features)
.await?;
assert!(
(converted_audio.0.len() as i32 - source_audio.len() as i32).abs() < 1000,
"Converted audio length should be close to source length"
);
let sum: f32 = converted_audio.0.iter().map(|&x| x.abs()).sum();
assert!(sum > 0.0, "Converted audio shouldn't be all zeros");
Ok(())
}
#[tokio::test]
async fn test_tcc_with_real_features() -> anyhow::Result<()> {
let mut tcc = ToneColorConverter::new("../checkpoint/tone_color_converter.onnx")?;
let mut see = SpeakerEmbeddingExtractor::new("../checkpoint/speaker_embedding_extractor.onnx")?;
let sample_rate = 16000;
let duration_secs = 3.0;
let num_samples = (sample_rate as f32 * duration_secs) as usize;
let mut source_audio = Vec::with_capacity(num_samples);
for i in 0..num_samples {
let t = i as f32 / sample_rate as f32;
let f0 = 120.0; let sample = 0.5 * (2.0 * PI * f0 * t).sin()
+ 0.3 * (2.0 * PI * f0 * 2.0 * t).sin()
+ 0.2 * (2.0 * PI * f0 * 3.0 * t).sin();
let envelope = 0.5 + 0.5 * (2.0 * PI * 0.5 * t).sin();
source_audio.push(sample * envelope * 0.5);
}
let mut target_audio = Vec::with_capacity(num_samples);
for i in 0..num_samples {
let t = i as f32 / sample_rate as f32;
let f0 = 220.0; let sample = 0.5 * (2.0 * PI * f0 * t).sin()
+ 0.3 * (2.0 * PI * f0 * 2.0 * t).sin()
+ 0.2 * (2.0 * PI * f0 * 3.0 * t).sin();
let envelope = 0.5 + 0.5 * (2.0 * PI * 0.5 * t).sin();
target_audio.push(sample * envelope * 0.5);
}
let source_features = see.extract(&source_audio, 1).await?;
let target_features = see.extract(&target_audio, 1).await?;
let converted_audio = tcc
.convert(&source_audio, &source_features, &target_features)
.await?;
assert!(
(converted_audio.0.len() as i32 - source_audio.len() as i32).abs() < 1000,
"Converted audio length should be close to source length"
);
let sum: f32 = converted_audio.0.iter().map(|&x| x.abs()).sum();
assert!(sum > 0.0, "Converted audio shouldn't be all zeros");
Ok(())
}
#[tokio::test]
async fn test_tcc_empty_input() -> anyhow::Result<()> {
let mut tcc = ToneColorConverter::new("../checkpoint/tone_color_converter.onnx")?;
let empty_audio = Vec::<f32>::new();
let source_features = vec![0.0f32; 256];
let target_features = vec![0.0f32; 256];
let source_features_array: Vec<[f32; 256]> = source_features
.chunks(256)
.map(|chunk| {
let mut arr = [0.0; 256];
arr.copy_from_slice(chunk);
arr
})
.collect();
let target_features_array: Vec<[f32; 256]> = target_features
.chunks(256)
.map(|chunk| {
let mut arr = [0.0; 256];
arr.copy_from_slice(chunk);
arr
})
.collect();
let result = tcc
.convert(&empty_audio, &source_features_array, &target_features_array)
.await;
assert!(
result.is_err(),
"Processing empty input should return error"
);
Ok(())
}
#[tokio::test]
async fn test_tcc_feature_influence() -> anyhow::Result<()> {
let mut tcc = ToneColorConverter::new("../checkpoint/tone_color_converter.onnx")?;
let sample_rate = 16000;
let duration_secs = 3.0;
let num_samples = (sample_rate as f32 * duration_secs) as usize;
let mut audio_data = Vec::with_capacity(num_samples);
for i in 0..num_samples {
let t = i as f32 / sample_rate as f32;
let f0 = 150.0;
let sample = 0.5 * (2.0 * PI * f0 * t).sin();
audio_data.push(sample);
}
let mut features1 = [[0.0f32; 256]];
let mut features2 = [[0.0f32; 256]];
for i in 0..256 {
features1[0][i] = 0.1;
}
for i in 0..256 {
features2[0][i] = -0.1;
}
let converted1 = tcc.convert(&audio_data, &features1, &features1).await?;
let converted2 = tcc.convert(&audio_data, &features2, &features2).await?;
let mut diff_sum = 0.0;
let min_len = converted1.0.len().min(converted2.0.len());
for i in 0..min_len {
diff_sum += (converted1.0[i] - converted2.0[i]).abs();
}
assert!(
diff_sum > 0.0,
"Different feature vectors should produce different conversion results"
);
Ok(())
}