subx_cli/core/sync/dialogue/
analyzer.rs

1use super::segment::DialogueSegment;
2use std::collections::VecDeque;
3
4/// 音訊能量分析器,用於語音活動檢測
5pub struct EnergyAnalyzer {
6    window_size: usize,
7    hop_size: usize,
8    threshold: f32,
9    min_duration_ms: u64,
10}
11
12impl EnergyAnalyzer {
13    /// 建立分析器,設定能量閾值與最短語音持續時間
14    pub fn new(threshold: f32, min_duration_ms: u64) -> Self {
15        Self {
16            window_size: 1024,
17            hop_size: 512,
18            threshold,
19            min_duration_ms,
20        }
21    }
22
23    /// 分析音訊樣本,回傳對話片段列表
24    pub fn analyze(&self, audio_data: &[f32], sample_rate: u32) -> Vec<DialogueSegment> {
25        let mut segments: Vec<DialogueSegment> = Vec::new();
26        let mut energy_buffer = VecDeque::new();
27
28        for (i, chunk) in audio_data.chunks(self.hop_size).enumerate() {
29            let energy = self.calculate_energy(chunk);
30            energy_buffer.push_back(energy);
31            if energy_buffer.len() > self.window_size / self.hop_size {
32                energy_buffer.pop_front();
33            }
34            let is_speech = self.detect_speech(&energy_buffer);
35            let timestamp = (i * self.hop_size) as f64 / sample_rate as f64;
36
37            if is_speech {
38                if let Some(last) = segments.last_mut() {
39                    if last.is_speech {
40                        last.end_time = timestamp;
41                    } else {
42                        segments.push(DialogueSegment::new_speech(timestamp, timestamp));
43                    }
44                } else {
45                    segments.push(DialogueSegment::new_speech(timestamp, timestamp));
46                }
47            }
48        }
49        self.filter_short_segments(segments)
50    }
51
52    fn calculate_energy(&self, chunk: &[f32]) -> f32 {
53        let sum_sq: f32 = chunk.iter().map(|&v| v * v).sum();
54        if chunk.is_empty() {
55            0.0
56        } else {
57            (sum_sq / chunk.len() as f32).sqrt()
58        }
59    }
60
61    fn detect_speech(&self, buffer: &VecDeque<f32>) -> bool {
62        if buffer.is_empty() {
63            return false;
64        }
65        let avg: f32 = buffer.iter().copied().sum::<f32>() / buffer.len() as f32;
66        avg > self.threshold
67    }
68
69    fn filter_short_segments(&self, segments: Vec<DialogueSegment>) -> Vec<DialogueSegment> {
70        segments
71            .into_iter()
72            .filter(|seg| (seg.duration() * 1000.0) as u64 >= self.min_duration_ms)
73            .collect()
74    }
75}