native-whisperx 0.1.4

WhisperX-style transcription workflows composed from moritzbrantner Rust building-block crates.
use std::time::Instant;

#[cfg(feature = "diarization")]
use audio_analysis_transcription::TranscriptDiarizationProvider;
use audio_analysis_transcription::{
    transcribe, CandleWhisperTranscriber, EnergyVadTranscriptionProvider,
    ReusableCandleWhisperTranscriber, TranscriptionPipelineRequest, TranscriptionPipelineResponse,
    TranscriptionProviderSelection,
};

use crate::config::{
    AsrProvider, NativeWhisperxConfig, NativeWhisperxError, NativeWhisperxReport, VadMethod,
};
use crate::config_mapping::{
    build_transcription_request, run_native_with_optional_alignment, run_native_with_selected_vad,
};
use crate::output::write_outputs_with_options;
use crate::report::{append_native_alignment_diagnostics, append_native_diarization_diagnostics};

pub fn run(config: NativeWhisperxConfig) -> Result<NativeWhisperxReport, NativeWhisperxError> {
    let run_started = Instant::now();
    let request = build_transcription_request(&config)?;
    let mut response = if config.asr.provider == AsrProvider::Native && config.translation.enabled {
        crate::run_native_with_translation(request, &config)?
    } else if config.asr.provider == AsrProvider::Native
        && matches!(config.vad.method, VadMethod::Silero | VadMethod::Pyannote)
    {
        run_native_with_selected_vad(request, &config)?
    } else {
        run_with_phase_observer(request, &config)?
    };
    append_native_alignment_diagnostics(&mut response, &config);
    append_native_diarization_diagnostics(&mut response, &config);
    crate::save_draft_speakers_from_response(&mut response, &config)?;
    let output_started = Instant::now();
    let output_files = write_outputs_with_options(
        &response,
        &config.output,
        config.alignment.return_char_alignments,
    )?;
    response.diagnostics.push(format!(
        "phaseOutputSeconds={:.6}",
        output_started.elapsed().as_secs_f64()
    ));
    response.diagnostics.push(format!(
        "phaseNativeTotalSeconds={:.6}",
        run_started.elapsed().as_secs_f64()
    ));
    Ok(NativeWhisperxReport {
        response,
        output_files,
    })
}

pub fn run_many(
    configs: Vec<NativeWhisperxConfig>,
) -> Result<Vec<NativeWhisperxReport>, NativeWhisperxError> {
    if should_reuse_native_asr_provider(&configs) {
        return run_many_reusing_native_provider(configs);
    }
    configs.into_iter().map(run).collect()
}

pub fn run_many_reusing_native_provider(
    configs: Vec<NativeWhisperxConfig>,
) -> Result<Vec<NativeWhisperxReport>, NativeWhisperxError> {
    let mut reports = Vec::with_capacity(configs.len());
    let mut reusable_asr: Option<ReusableCandleWhisperTranscriber> = None;

    for config in configs {
        let run_started = Instant::now();
        let request = build_transcription_request(&config)?;
        let TranscriptionProviderSelection::CandleWhisper(options) = &request.provider else {
            return Err(NativeWhisperxError::InvalidConfig(
                "native multi-input reuse requires the Candle Whisper native provider".to_string(),
            ));
        };

        let reused_provider = reusable_asr
            .as_ref()
            .is_some_and(|provider| provider.options == *options);
        if !reused_provider {
            reusable_asr = Some(ReusableCandleWhisperTranscriber::new(options.clone()));
        }
        let asr_provider = reusable_asr
            .as_mut()
            .expect("native ASR provider should be initialized");
        let mut vad = EnergyVadTranscriptionProvider;
        let mut response = run_with_reusable_asr(request, &config, &mut vad, asr_provider)?;
        response.diagnostics.push(if reused_provider {
            "nativeMultiInputAsrProvider=reused".to_string()
        } else {
            "nativeMultiInputAsrProvider=loaded".to_string()
        });
        append_native_alignment_diagnostics(&mut response, &config);
        append_native_diarization_diagnostics(&mut response, &config);
        crate::save_draft_speakers_from_response(&mut response, &config)?;
        let output_started = Instant::now();
        let output_files = write_outputs_with_options(
            &response,
            &config.output,
            config.alignment.return_char_alignments,
        )?;
        response.diagnostics.push(format!(
            "phaseOutputSeconds={:.6}",
            output_started.elapsed().as_secs_f64()
        ));
        response.diagnostics.push(format!(
            "phaseNativeTotalSeconds={:.6}",
            run_started.elapsed().as_secs_f64()
        ));
        reports.push(NativeWhisperxReport {
            response,
            output_files,
        });
    }

    Ok(reports)
}

fn should_reuse_native_asr_provider(configs: &[NativeWhisperxConfig]) -> bool {
    configs.len() > 1
        && configs.iter().all(|config| {
            config.asr.provider == AsrProvider::Native
                && !config.translation.enabled
                && matches!(config.vad.method, VadMethod::Energy)
        })
}

fn run_with_reusable_asr(
    request: TranscriptionPipelineRequest,
    config: &NativeWhisperxConfig,
    vad_provider: &mut EnergyVadTranscriptionProvider,
    asr_provider: &mut ReusableCandleWhisperTranscriber,
) -> Result<TranscriptionPipelineResponse, NativeWhisperxError> {
    #[cfg(feature = "diarization")]
    {
        let mut diarizer = crate::native_diarization_provider(config)?;
        let diarization_provider = request
            .diarization
            .enabled
            .then_some(&mut diarizer as &mut dyn TranscriptDiarizationProvider);
        return run_native_with_optional_alignment(
            request,
            vad_provider,
            asr_provider,
            diarization_provider,
        );
    }

    #[cfg(not(feature = "diarization"))]
    {
        let _ = config;
        run_native_with_optional_alignment(request, vad_provider, asr_provider, None)
    }
}

pub(crate) fn run_with_phase_observer(
    request: TranscriptionPipelineRequest,
    config: &NativeWhisperxConfig,
) -> Result<TranscriptionPipelineResponse, NativeWhisperxError> {
    if config.asr.provider != AsrProvider::Native {
        return transcribe(request)
            .map_err(|error| NativeWhisperxError::Transcription(error.to_string()));
    }

    let TranscriptionProviderSelection::CandleWhisper(options) = &request.provider else {
        return transcribe(request)
            .map_err(|error| NativeWhisperxError::Transcription(error.to_string()));
    };
    let mut vad = EnergyVadTranscriptionProvider;
    let mut asr_provider = CandleWhisperTranscriber::new(options.clone());

    #[cfg(feature = "diarization")]
    {
        let mut diarizer = crate::native_diarization_provider(config)?;
        let diarization_provider = request
            .diarization
            .enabled
            .then_some(&mut diarizer as &mut dyn TranscriptDiarizationProvider);
        run_native_with_optional_alignment(
            request,
            &mut vad,
            &mut asr_provider,
            diarization_provider,
        )
    }

    #[cfg(not(feature = "diarization"))]
    {
        run_native_with_optional_alignment(request, &mut vad, &mut asr_provider, None)
    }
}