subx_cli/core/sync/dialogue/
analyzer.rs1use super::segment::DialogueSegment;
2use std::collections::VecDeque;
3
4pub struct EnergyAnalyzer {
6 window_size: usize,
7 hop_size: usize,
8 threshold: f32,
9 min_duration_ms: u64,
10}
11
12impl EnergyAnalyzer {
13 pub fn new(threshold: f32, min_duration_ms: u64) -> Self {
15 Self {
16 window_size: 1024,
17 hop_size: 512,
18 threshold,
19 min_duration_ms,
20 }
21 }
22
23 pub fn analyze(&self, audio_data: &[f32], sample_rate: u32) -> Vec<DialogueSegment> {
25 let mut segments: Vec<DialogueSegment> = Vec::new();
26 let mut energy_buffer = VecDeque::new();
27
28 for (i, chunk) in audio_data.chunks(self.hop_size).enumerate() {
29 let energy = self.calculate_energy(chunk);
30 energy_buffer.push_back(energy);
31 if energy_buffer.len() > self.window_size / self.hop_size {
32 energy_buffer.pop_front();
33 }
34 let is_speech = self.detect_speech(&energy_buffer);
35 let timestamp = (i * self.hop_size) as f64 / sample_rate as f64;
36
37 if is_speech {
38 if let Some(last) = segments.last_mut() {
39 if last.is_speech {
40 last.end_time = timestamp;
41 } else {
42 segments.push(DialogueSegment::new_speech(timestamp, timestamp));
43 }
44 } else {
45 segments.push(DialogueSegment::new_speech(timestamp, timestamp));
46 }
47 }
48 }
49 self.filter_short_segments(segments)
50 }
51
52 fn calculate_energy(&self, chunk: &[f32]) -> f32 {
53 let sum_sq: f32 = chunk.iter().map(|&v| v * v).sum();
54 if chunk.is_empty() {
55 0.0
56 } else {
57 (sum_sq / chunk.len() as f32).sqrt()
58 }
59 }
60
61 fn detect_speech(&self, buffer: &VecDeque<f32>) -> bool {
62 if buffer.is_empty() {
63 return false;
64 }
65 let avg: f32 = buffer.iter().copied().sum::<f32>() / buffer.len() as f32;
66 avg > self.threshold
67 }
68
69 fn filter_short_segments(&self, segments: Vec<DialogueSegment>) -> Vec<DialogueSegment> {
70 segments
71 .into_iter()
72 .filter(|seg| (seg.duration() * 1000.0) as u64 >= self.min_duration_ms)
73 .collect()
74 }
75}