subx_cli/core/sync/dialogue/
detector.rs

1use crate::Result;
2use crate::config::{SyncConfig, load_config};
3use crate::core::sync::dialogue::{DialogueSegment, EnergyAnalyzer};
4use crate::services::audio::AudioData;
5use std::path::Path;
6
7/// 主對話檢測器,整合能量分析與配置
8pub struct DialogueDetector {
9    energy_analyzer: EnergyAnalyzer,
10    config: SyncConfig,
11}
12
13impl DialogueDetector {
14    /// 建立對話檢測器,從配置讀取參數
15    pub fn new() -> Result<Self> {
16        let config = load_config()?.sync;
17        let energy_analyzer = EnergyAnalyzer::new(
18            config.dialogue_detection_threshold,
19            config.min_dialogue_duration_ms,
20        );
21        Ok(Self {
22            energy_analyzer,
23            config,
24        })
25    }
26
27    /// 執行對話檢測,回傳語音活動片段清單
28    pub async fn detect_dialogue(&self, audio_path: &Path) -> Result<Vec<DialogueSegment>> {
29        // 若未啟用,直接回傳空列表
30        if !self.config.enable_dialogue_detection {
31            return Ok(Vec::new());
32        }
33        let audio_data = self.load_audio(audio_path).await?;
34        let segments = self
35            .energy_analyzer
36            .analyze(&audio_data.samples, audio_data.sample_rate);
37        Ok(self.optimize_segments(segments))
38    }
39
40    async fn load_audio(&self, audio_path: &Path) -> Result<AudioData> {
41        use crate::services::audio::{AudioAnalyzer, AusAdapter};
42
43        // 根據配置決定是否自動檢測採樣率
44        let sample_rate = if self.config.auto_detect_sample_rate {
45            let adapter = AusAdapter::new(self.config.audio_sample_rate);
46            adapter.read_audio_file(audio_path)?.sample_rate
47        } else {
48            self.config.audio_sample_rate
49        };
50        let analyzer = AudioAnalyzer::new(sample_rate);
51        analyzer.load_audio_data(audio_path).await
52    }
53
54    fn optimize_segments(&self, segments: Vec<DialogueSegment>) -> Vec<DialogueSegment> {
55        let mut optimized = Vec::new();
56        let mut current: Option<DialogueSegment> = None;
57        let gap = self.config.dialogue_merge_gap_ms as f64 / 1000.0;
58        for seg in segments {
59            if let Some(mut prev) = current.take() {
60                if prev.is_speech && seg.is_speech && seg.start_time - prev.end_time < gap {
61                    prev.end_time = seg.end_time;
62                    current = Some(prev);
63                } else {
64                    optimized.push(prev);
65                    current = Some(seg);
66                }
67            } else {
68                current = Some(seg);
69            }
70        }
71        if let Some(last) = current {
72            optimized.push(last);
73        }
74        optimized
75    }
76
77    /// 計算語音佔比,以評估語音活動程度
78    pub fn get_speech_ratio(&self, segments: &[DialogueSegment]) -> f32 {
79        let total: f64 = segments.iter().map(|s| s.duration()).sum();
80        let speech: f64 = segments
81            .iter()
82            .filter(|s| s.is_speech)
83            .map(|s| s.duration())
84            .sum();
85        if total > 0.0 {
86            (speech / total) as f32
87        } else {
88            0.0
89        }
90    }
91}