subx_cli/services/audio/
analyzer.rs

1//! 基於 aus crate 的音訊分析器
2
3use crate::services::audio::{AudioData, AudioEnvelope};
4use crate::{Result, error::SubXError};
5use aus::{AudioFile, WindowType, analysis, operations, spectrum};
6use std::path::Path;
7
8/// 基於 aus 的音訊分析器
9pub struct AusAudioAnalyzer {
10    sample_rate: u32,
11    window_size: usize,
12    hop_size: usize,
13}
14
15impl AusAudioAnalyzer {
16    /// 建立新的分析器,設定採樣率
17    pub fn new(sample_rate: u32) -> Self {
18        Self {
19            sample_rate,
20            window_size: 1024,
21            hop_size: 512,
22        }
23    }
24
25    /// 載入音訊檔案使用 aus
26    pub async fn load_audio_file<P: AsRef<Path>>(&self, audio_path: P) -> Result<AudioFile> {
27        let path = audio_path.as_ref();
28        let path_str = path
29            .to_str()
30            .ok_or_else(|| SubXError::audio_processing("無法轉換路徑為 UTF-8 字串"))?;
31        let mut audio_file = aus::read(path_str)?;
32        if audio_file.num_channels > 1 {
33            aus::mixdown(&mut audio_file);
34        }
35        Ok(audio_file)
36    }
37
38    /// 載入音訊檔案並轉換為 AudioData 格式
39    pub async fn load_audio_data<P: AsRef<Path>>(&self, audio_path: P) -> Result<AudioData> {
40        let audio_file = self.load_audio_file(audio_path).await?;
41        let samples: Vec<f32> = audio_file.samples[0].iter().map(|&x| x as f32).collect();
42        Ok(AudioData {
43            samples,
44            sample_rate: audio_file.sample_rate,
45            channels: audio_file.num_channels,
46            duration: audio_file.duration as f32,
47        })
48    }
49
50    /// 提取音訊能量包絡
51    pub async fn extract_envelope<P: AsRef<Path>>(&self, audio_path: P) -> Result<AudioEnvelope> {
52        let audio_file = self.load_audio_file(audio_path).await?;
53        let samples = &audio_file.samples[0];
54        let mut energy_samples = Vec::new();
55        for chunk in samples.chunks(self.hop_size) {
56            let rms_energy = operations::rms(chunk);
57            energy_samples.push(rms_energy as f32);
58        }
59        let duration = audio_file.duration as f32;
60        Ok(AudioEnvelope {
61            samples: energy_samples,
62            sample_rate: self.sample_rate,
63            duration,
64        })
65    }
66
67    /// 偵測對話段落 (相容舊介面)
68    pub fn detect_dialogue(
69        &self,
70        envelope: &AudioEnvelope,
71        threshold: f32,
72    ) -> Vec<crate::services::audio::DialogueSegment> {
73        let mut segments = Vec::new();
74        let mut in_dialogue = false;
75        let mut start = 0.0;
76        let time_per_sample = envelope.duration / envelope.samples.len() as f32;
77
78        for (i, &e) in envelope.samples.iter().enumerate() {
79            let t = i as f32 * time_per_sample;
80            if e > threshold && !in_dialogue {
81                in_dialogue = true;
82                start = t;
83            } else if e <= threshold && in_dialogue {
84                in_dialogue = false;
85                if t - start > 0.5 {
86                    segments.push(crate::services::audio::DialogueSegment {
87                        start_time: start,
88                        end_time: t,
89                        intensity: e,
90                    });
91                }
92            }
93        }
94
95        segments
96    }
97
98    /// 音訊特徵分析使用 aus
99    pub async fn analyze_audio_features(&self, audio_file: &AudioFile) -> Result<AudioFeatures> {
100        let samples = &audio_file.samples[0];
101        let stft_result = spectrum::rstft(
102            samples,
103            self.window_size,
104            self.hop_size,
105            WindowType::Hanning,
106        );
107
108        let mut features = Vec::new();
109        for frame in stft_result.iter() {
110            let (magnitude_spectrum, _) = spectrum::complex_to_polar_rfft(frame);
111            let frequencies = spectrum::rfftfreq(self.window_size, audio_file.sample_rate);
112
113            let spectral_centroid = analysis::spectral_centroid(&magnitude_spectrum, &frequencies);
114            let spectral_entropy = analysis::spectral_entropy(&magnitude_spectrum);
115            let zero_crossing_rate = analysis::zero_crossing_rate(samples, audio_file.sample_rate);
116
117            features.push(FrameFeatures {
118                spectral_centroid: spectral_centroid as f32,
119                spectral_entropy: spectral_entropy as f32,
120                zero_crossing_rate: zero_crossing_rate as f32,
121            });
122        }
123
124        Ok(AudioFeatures { frames: features })
125    }
126}
127
128/// 音訊特徵資料結構
129#[derive(Debug, Clone)]
130pub struct AudioFeatures {
131    pub frames: Vec<FrameFeatures>,
132}
133
134#[derive(Debug, Clone)]
135pub struct FrameFeatures {
136    pub spectral_centroid: f32,
137    pub spectral_entropy: f32,
138    pub zero_crossing_rate: f32,
139}