subx-cli 1.7.4 - Docs.rs

use crate::Result;
use crate::services::vad::audio_loader::DirectAudioLoader;
use crate::services::vad::detector::AudioInfo;
use std::path::Path;

/// Audio processor for VAD operations.
///
/// Handles loading, resampling, and format conversion of audio files
/// for voice activity detection processing.
/// Audio processor for VAD operations, optimized to use original sample rate and first channel only.
pub struct VadAudioProcessor {}

/// Processed audio data ready for VAD analysis.
///
/// Contains the audio samples and metadata after processing
/// and format conversion.
#[derive(Debug, Clone)]
pub struct ProcessedAudioData {
    /// Audio samples as 16-bit integers
    pub samples: Vec<i16>,
    /// Audio metadata and properties
    pub info: AudioInfo,
}

impl VadAudioProcessor {
    /// Create a new VAD audio processor.
    ///
    /// # Arguments
    ///
    /// * `target_sample_rate` - Desired sample rate for processing
    /// * `target_channels` - Desired number of audio channels
    ///
    /// # Returns
    ///
    /// A new `VadAudioProcessor` instance
    /// Create a new VAD audio processor.
    pub fn new() -> Result<Self> {
        Ok(Self {})
    }

    /// Load and prepare audio file for VAD processing.
    ///
    /// Performs all necessary audio processing steps including loading,
    /// resampling, and format conversion to prepare the audio for
    /// voice activity detection.
    ///
    /// # Arguments
    ///
    /// * `audio_path` - Path to the audio file to process
    ///
    /// # Returns
    ///
    /// Processed audio data ready for VAD analysis
    ///
    /// # Errors
    ///
    /// Returns an error if:
    /// - Audio file cannot be loaded
    /// - Audio format is unsupported
    /// - Resampling fails
    /// - Format conversion fails
    ///
    /// Directly loads and prepares audio files for VAD processing, supporting multiple formats.
    /// Load and prepare audio file for VAD processing.
    ///
    /// Uses original sample rate and first channel only.
    pub async fn load_and_prepare_audio_direct(
        &self,
        audio_path: &Path,
    ) -> Result<ProcessedAudioData> {
        // 1. Load with DirectAudioLoader in a blocking task to avoid stalling
        //    the async runtime during synchronous decoding / filesystem access.
        let audio_path_buf = audio_path.to_path_buf();
        let load_result =
            tokio::task::spawn_blocking(move || -> Result<Option<(Vec<i16>, AudioInfo)>> {
                let loader = DirectAudioLoader::new()?;
                // Defense-in-depth fallback limit; matches the default value
                // of `general.max_audio_bytes`. Production callers invoking
                // `DirectAudioLoader::load_audio_samples` directly should pass
                // the configured value from `GeneralConfig`.
                const DEFAULT_MAX_AUDIO_BYTES: u64 = 2_147_483_648;
                match loader.load_audio_samples(&audio_path_buf, DEFAULT_MAX_AUDIO_BYTES) {
                    Ok((samples, info)) => Ok(Some((samples, info))),
                    Err(e) => {
                        // If the file is empty, return None to signal empty samples
                        if let Ok(metadata) = std::fs::metadata(&audio_path_buf) {
                            if metadata.len() == 0 {
                                return Ok(None);
                            }
                        }
                        Err(e)
                    }
                }
            })
            .await
            .map_err(|e| crate::error::SubXError::audio_processing(e.to_string()))??;

        let (samples, info) = match load_result {
            Some(v) => v,
            None => {
                return Ok(ProcessedAudioData {
                    samples: vec![],
                    info: AudioInfo {
                        sample_rate: 16000, // Default value
                        channels: 1,
                        duration_seconds: 0.0,
                        total_samples: 0,
                    },
                });
            }
        };

        // 2. Extract first channel if multi-channel, retain original sample rate
        let mono_samples = if info.channels == 1 {
            samples
        } else {
            self.extract_first_channel(&samples, info.channels as usize)
        };
        let mono_info = AudioInfo {
            sample_rate: info.sample_rate,
            channels: 1,
            duration_seconds: info.duration_seconds,
            total_samples: mono_samples.len(),
        };
        Ok(ProcessedAudioData {
            samples: mono_samples,
            info: mono_info,
        })
    }

    // Removed resampling and multi-channel averaging methods

    /// Extract the first channel samples from interleaved multi-channel data.
    fn extract_first_channel(&self, samples: &[i16], channels: usize) -> Vec<i16> {
        samples.iter().step_by(channels).copied().collect()
    }
}