use {
super::get_session_builder,
crate::{OperationError, resample},
ndarray::Array2,
ort::{inputs, session::RunOptions, session::Session, value::TensorRef},
std::{
path::Path,
time::{Duration, SystemTime},
},
};
pub struct ToneColorConverter {
model: Session,
}
impl ToneColorConverter {
pub fn new<P>(model_path: P) -> Result<Self, OperationError>
where
P: AsRef<Path>,
{
let model = get_session_builder()?.commit_from_file(model_path)?;
Ok(Self { model })
}
pub async fn convert(
&mut self,
src_audio: &[f32],
src_se: &[[f32; 256]],
tgt_se: &[[f32; 256]],
) -> Result<(Vec<f32>, usize, Duration), OperationError> {
let max_channels = tgt_se.len().max(src_se.len());
let audio = if src_se.len() < max_channels {
resample::<22050, 22050, f32>(src_audio, src_se.len(), max_channels)?
} else {
src_audio.to_vec()
};
let audio = Array2::from_shape_vec((audio.len() / max_channels, max_channels), audio)?;
let src_se = Array2::from(src_se.to_vec());
let tgt_se = Array2::from(tgt_se.to_vec());
let options = RunOptions::new()?;
let start = SystemTime::now();
let outputs = self
.model
.run_async(
inputs![
"src_audio" => TensorRef::from_array_view(&audio)?,
"src_se" => TensorRef::from_array_view(&src_se)?,
"tgt_se" => TensorRef::from_array_view(&tgt_se)?,
],
&options,
)?
.await?;
let (_, audio) = outputs["audio"].try_extract_tensor::<f32>()?;
Ok((audio.to_vec(), max_channels, start.elapsed()?))
}
}