whisper-macos-cli 0.1.2

Transcribe audio files locally on Apple Silicon via whisper.cpp with Metal GPU acceleration, exposing a strict stdin/stdout JSON contract for AI agents and Unix pipelines.
Documentation
use rubato::{FftFixedIn, Resampler};

const WHISPER_SAMPLE_RATE: u32 = 16000;

pub fn resample_to_16khz(
    samples: &[i16],
    input_rate: u32,
) -> Result<Vec<i16>, crate::error::Error> {
    if input_rate == WHISPER_SAMPLE_RATE {
        return Ok(samples.to_vec());
    }

    let samples_f64: Vec<f64> = samples.iter().map(|&s| s as f64 / 32768.0).collect();

    let chunk_size = 1024;
    let mut resampler = FftFixedIn::<f64>::new(
        input_rate as usize,
        WHISPER_SAMPLE_RATE as usize,
        chunk_size,
        1,
        1,
    )
    .map_err(|e| crate::error::Error::AudioDecode(anyhow::anyhow!("resampler init: {e}")))?;

    let mut output_f64: Vec<f64> = Vec::new();
    let mut pos = 0;

    while pos + chunk_size <= samples_f64.len() {
        let chunk = &samples_f64[pos..pos + chunk_size];
        let input = vec![chunk.to_vec()];

        let result = resampler
            .process(&input, None)
            .map_err(|e| crate::error::Error::AudioDecode(anyhow::anyhow!("resample: {e}")))?;

        if !result.is_empty() {
            output_f64.extend_from_slice(&result[0]);
        }

        pos += chunk_size;
    }

    if pos < samples_f64.len() {
        let mut last_chunk = samples_f64[pos..].to_vec();
        last_chunk.resize(chunk_size, 0.0);
        let input = vec![last_chunk];

        let result = resampler
            .process(&input, None)
            .map_err(|e| crate::error::Error::AudioDecode(anyhow::anyhow!("resample last: {e}")))?;

        if !result.is_empty() {
            let remaining = samples_f64.len() - pos;
            let expected =
                (remaining as f64 * WHISPER_SAMPLE_RATE as f64 / input_rate as f64).ceil() as usize;
            let take = expected.min(result[0].len());
            output_f64.extend_from_slice(&result[0][..take]);
        }
    }

    Ok(output_f64
        .iter()
        .map(|&s| (s.clamp(-1.0, 1.0) * 32767.0) as i16)
        .collect())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn resample_passthrough_when_already_16khz() {
        let input = vec![100i16, 200, 300, 400];
        let result = resample_to_16khz(&input, 16000).unwrap();
        assert_eq!(result, input);
    }

    #[test]
    fn resample_handles_empty_input_at_non_target_rate() {
        let result = resample_to_16khz(&[], 44100).unwrap();
        assert!(result.is_empty());
    }

    #[test]
    fn resample_44100_to_16000_produces_output() {
        let input: Vec<i16> = (0..4410).map(|i| (i % 1000) as i16).collect();
        let result = resample_to_16khz(&input, 44100).unwrap();
        assert!(!result.is_empty());
        let expected_min = input.len() * 16 / 44 / 2;
        let expected_max = input.len() * 16 / 44 * 2;
        assert!(result.len() >= expected_min);
        assert!(result.len() <= expected_max);
    }

    #[test]
    fn resample_48000_to_16000_produces_output() {
        let input: Vec<i16> = (0..4800).map(|i| ((i * 7) % 1000) as i16).collect();
        let result = resample_to_16khz(&input, 48000).unwrap();
        assert!(!result.is_empty());
    }

    #[test]
    fn resample_8000_to_16000_doubles_samples() {
        let input: Vec<i16> = (0..800).map(|i| i as i16).collect();
        let result = resample_to_16khz(&input, 8000).unwrap();
        assert!(result.len() >= 1500);
    }
}