use ferrum_types::{FerrumError, Result};
use std::path::Path;
pub const CHUNK_SAMPLES: usize = 16000 * 30;
pub fn load_audio(path: &str) -> Result<Vec<f32>> {
let p = Path::new(path);
let ext = p
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_lowercase();
if ext == "wav" {
return load_wav_file(path);
}
convert_with_ffmpeg(path)
}
pub fn load_audio_at_rate(path: &str, target_rate: u32) -> Result<Vec<f32>> {
let p = Path::new(path);
let ext = p
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_lowercase();
if ext == "wav" {
return load_wav_file_at_rate(path, target_rate);
}
convert_with_ffmpeg_at_rate(path, target_rate)
}
pub fn load_audio_bytes(data: &[u8]) -> Result<Vec<f32>> {
match load_wav_bytes(data) {
Ok(pcm) => return Ok(pcm),
Err(_) => {}
}
let tmp = std::env::temp_dir().join("ferrum_audio_tmp");
std::fs::write(&tmp, data).map_err(|e| FerrumError::model(format!("write temp audio: {e}")))?;
let result = convert_with_ffmpeg(tmp.to_str().unwrap_or(""));
let _ = std::fs::remove_file(&tmp);
result
}
pub fn chunk_pcm(pcm: &[f32]) -> Vec<&[f32]> {
if pcm.len() <= CHUNK_SAMPLES {
return vec![pcm];
}
pcm.chunks(CHUNK_SAMPLES).collect()
}
fn load_wav_file(path: &str) -> Result<Vec<f32>> {
let reader = hound::WavReader::open(path)
.map_err(|e| FerrumError::model(format!("open audio {path}: {e}")))?;
decode_wav(reader)
}
fn load_wav_bytes(data: &[u8]) -> Result<Vec<f32>> {
let cursor = std::io::Cursor::new(data);
let reader =
hound::WavReader::new(cursor).map_err(|e| FerrumError::model(format!("decode: {e}")))?;
decode_wav(reader)
}
fn decode_wav<R: std::io::Read>(reader: hound::WavReader<R>) -> Result<Vec<f32>> {
let spec = reader.spec();
let sample_rate = spec.sample_rate as f64;
let channels = spec.channels as usize;
let samples: Vec<f32> = match spec.sample_format {
hound::SampleFormat::Float => reader
.into_samples::<f32>()
.filter_map(|s| s.ok())
.collect(),
hound::SampleFormat::Int => {
let bits = spec.bits_per_sample;
let max_val = (1u32 << (bits - 1)) as f32;
reader
.into_samples::<i32>()
.filter_map(|s| s.ok())
.map(|s| s as f32 / max_val)
.collect()
}
};
let mono: Vec<f32> = if channels == 1 {
samples
} else {
samples
.chunks(channels)
.map(|chunk| chunk.iter().sum::<f32>() / channels as f32)
.collect()
};
if (sample_rate - 16000.0).abs() < 1.0 {
Ok(mono)
} else {
Ok(resample(&mono, sample_rate, 16000.0))
}
}
fn convert_with_ffmpeg(input_path: &str) -> Result<Vec<f32>> {
let output = std::env::temp_dir().join("ferrum_ffmpeg_out.wav");
let output_str = output.to_string_lossy().to_string();
let status = std::process::Command::new("ffmpeg")
.args([
"-y",
"-i",
input_path,
"-ar",
"16000",
"-ac",
"1",
"-sample_fmt",
"s16",
"-f",
"wav",
&output_str,
])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status();
match status {
Ok(s) if s.success() => {
let result = load_wav_file(&output_str);
let _ = std::fs::remove_file(&output);
result
}
Ok(s) => Err(FerrumError::model(format!(
"ffmpeg exited with code {}. Is the audio file valid?",
s.code().unwrap_or(-1)
))),
Err(_) => Err(FerrumError::model(
"ffmpeg not found. Install ffmpeg to process non-WAV audio (brew install ffmpeg)",
)),
}
}
fn load_wav_file_at_rate(path: &str, target_rate: u32) -> Result<Vec<f32>> {
let reader = hound::WavReader::open(path)
.map_err(|e| FerrumError::model(format!("open audio {path}: {e}")))?;
decode_wav_at_rate(reader, target_rate)
}
fn decode_wav_at_rate<R: std::io::Read>(
reader: hound::WavReader<R>,
target_rate: u32,
) -> Result<Vec<f32>> {
let spec = reader.spec();
let sample_rate = spec.sample_rate as f64;
let channels = spec.channels as usize;
let samples: Vec<f32> = match spec.sample_format {
hound::SampleFormat::Float => reader
.into_samples::<f32>()
.filter_map(|s| s.ok())
.collect(),
hound::SampleFormat::Int => {
let bits = spec.bits_per_sample;
let max_val = (1u32 << (bits - 1)) as f32;
reader
.into_samples::<i32>()
.filter_map(|s| s.ok())
.map(|s| s as f32 / max_val)
.collect()
}
};
let mono: Vec<f32> = if channels == 1 {
samples
} else {
samples
.chunks(channels)
.map(|chunk| chunk.iter().sum::<f32>() / channels as f32)
.collect()
};
let target = target_rate as f64;
if (sample_rate - target).abs() < 1.0 {
Ok(mono)
} else {
Ok(resample(&mono, sample_rate, target))
}
}
fn convert_with_ffmpeg_at_rate(input_path: &str, target_rate: u32) -> Result<Vec<f32>> {
let output = std::env::temp_dir().join("ferrum_ffmpeg_out_rate.wav");
let output_str = output.to_string_lossy().to_string();
let rate_str = target_rate.to_string();
let status = std::process::Command::new("ffmpeg")
.args([
"-y",
"-i",
input_path,
"-ar",
&rate_str,
"-ac",
"1",
"-sample_fmt",
"s16",
"-f",
"wav",
&output_str,
])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status();
match status {
Ok(s) if s.success() => {
let result = load_wav_file_at_rate(&output_str, target_rate);
let _ = std::fs::remove_file(&output);
result
}
Ok(s) => Err(FerrumError::model(format!(
"ffmpeg exited with code {}. Is the audio file valid?",
s.code().unwrap_or(-1)
))),
Err(_) => Err(FerrumError::model(
"ffmpeg not found. Install ffmpeg to process non-WAV audio (brew install ffmpeg)",
)),
}
}
pub(crate) fn resample(input: &[f32], from_rate: f64, to_rate: f64) -> Vec<f32> {
use rubato::{
audioadapter::Adapter, Async, FixedAsync, Resampler as RubatoResampler,
SincInterpolationParameters, SincInterpolationType, WindowFunction,
};
let ratio = to_rate / from_rate;
let chunk_size = 1024;
let params = SincInterpolationParameters {
sinc_len: 128,
f_cutoff: 0.95,
interpolation: SincInterpolationType::Linear,
oversampling_factor: 128,
window: WindowFunction::BlackmanHarris2,
};
let mut resampler =
Async::<f32>::new_sinc(ratio, 1.0, ¶ms, chunk_size, 1, FixedAsync::Input)
.expect("resample init");
let mut output = Vec::new();
let mut pos = 0;
while pos < input.len() {
let end = (pos + chunk_size).min(input.len());
let chunk = &input[pos..end];
let data: Vec<f32> = if chunk.len() < chunk_size {
let mut p = chunk.to_vec();
p.resize(chunk_size, 0.0);
p
} else {
chunk.to_vec()
};
let input_vecs = vec![data];
let input_adapter =
audioadapter_buffers::direct::SequentialSliceOfVecs::new(&input_vecs, 1, chunk_size)
.expect("input adapter");
let result = resampler
.process(&input_adapter, 0, None)
.expect("resample");
let frames = result.frames();
for i in 0..frames {
output.push(result.read_sample(0, i).unwrap_or(0.0));
}
pos += chunk_size;
}
output
}