subx_cli/core/sync/dialogue/
detector.rs1use crate::Result;
2use crate::config::{SyncConfig, load_config};
3use crate::core::sync::dialogue::{DialogueSegment, EnergyAnalyzer};
4use crate::services::audio::AudioData;
5use std::path::Path;
6
7pub struct DialogueDetector {
9 energy_analyzer: EnergyAnalyzer,
10 config: SyncConfig,
11}
12
13impl DialogueDetector {
14 pub fn new() -> Result<Self> {
16 let config = load_config()?.sync;
17 let energy_analyzer = EnergyAnalyzer::new(
18 config.dialogue_detection_threshold,
19 config.min_dialogue_duration_ms,
20 );
21 Ok(Self {
22 energy_analyzer,
23 config,
24 })
25 }
26
27 pub async fn detect_dialogue(&self, audio_path: &Path) -> Result<Vec<DialogueSegment>> {
29 if !self.config.enable_dialogue_detection {
31 return Ok(Vec::new());
32 }
33 let audio_data = self.load_audio(audio_path).await?;
34 let segments = self
35 .energy_analyzer
36 .analyze(&audio_data.samples, audio_data.sample_rate);
37 Ok(self.optimize_segments(segments))
38 }
39
40 async fn load_audio(&self, audio_path: &Path) -> Result<AudioData> {
41 use crate::services::audio::{AudioAnalyzer, AusAdapter};
42
43 let sample_rate = if self.config.auto_detect_sample_rate {
45 let adapter = AusAdapter::new(self.config.audio_sample_rate);
46 adapter.read_audio_file(audio_path)?.sample_rate
47 } else {
48 self.config.audio_sample_rate
49 };
50 let analyzer = AudioAnalyzer::new(sample_rate);
51 analyzer.load_audio_data(audio_path).await
52 }
53
54 fn optimize_segments(&self, segments: Vec<DialogueSegment>) -> Vec<DialogueSegment> {
55 let mut optimized = Vec::new();
56 let mut current: Option<DialogueSegment> = None;
57 let gap = self.config.dialogue_merge_gap_ms as f64 / 1000.0;
58 for seg in segments {
59 if let Some(mut prev) = current.take() {
60 if prev.is_speech && seg.is_speech && seg.start_time - prev.end_time < gap {
61 prev.end_time = seg.end_time;
62 current = Some(prev);
63 } else {
64 optimized.push(prev);
65 current = Some(seg);
66 }
67 } else {
68 current = Some(seg);
69 }
70 }
71 if let Some(last) = current {
72 optimized.push(last);
73 }
74 optimized
75 }
76
77 pub fn get_speech_ratio(&self, segments: &[DialogueSegment]) -> f32 {
79 let total: f64 = segments.iter().map(|s| s.duration()).sum();
80 let speech: f64 = segments
81 .iter()
82 .filter(|s| s.is_speech)
83 .map(|s| s.duration())
84 .sum();
85 if total > 0.0 {
86 (speech / total) as f32
87 } else {
88 0.0
89 }
90 }
91}