subx_cli/core/sync/
engine.rs

1use crate::Result;
2use crate::core::formats::Subtitle;
3use crate::services::audio::{AudioAnalyzer, AudioEnvelope};
4use std::path::Path;
5
6/// 同步引擎
7pub struct SyncEngine {
8    audio_analyzer: AudioAnalyzer,
9    config: SyncConfig,
10}
11
12/// 同步配置
13#[derive(Debug, Clone)]
14pub struct SyncConfig {
15    pub max_offset_seconds: f32,
16    pub correlation_threshold: f32,
17    pub dialogue_threshold: f32,
18    pub min_dialogue_length: f32,
19}
20
21/// 同步結果
22#[derive(Debug)]
23pub struct SyncResult {
24    pub offset_seconds: f32,
25    pub confidence: f32,
26    pub method_used: SyncMethod,
27    pub correlation_peak: f32,
28}
29
30/// 同步方法
31#[derive(Debug)]
32pub enum SyncMethod {
33    AudioCorrelation,
34    ManualOffset,
35    PatternMatching,
36}
37
38impl SyncEngine {
39    /// 建立同步引擎
40    pub fn new(config: SyncConfig) -> Self {
41        Self {
42            audio_analyzer: AudioAnalyzer::new(16000),
43            config,
44        }
45    }
46
47    /// 自動同步字幕
48    pub async fn sync_subtitle(
49        &self,
50        video_path: &Path,
51        subtitle: &Subtitle,
52    ) -> Result<SyncResult> {
53        let audio_envelope = self.audio_analyzer.extract_envelope(video_path).await?;
54        let _dialogue_segments = self
55            .audio_analyzer
56            .detect_dialogue(&audio_envelope, self.config.dialogue_threshold);
57
58        let subtitle_signal = self.generate_subtitle_signal(
59            subtitle,
60            audio_envelope.duration,
61            audio_envelope.sample_rate,
62        );
63        let correlation_result =
64            self.calculate_cross_correlation(&audio_envelope, &subtitle_signal)?;
65
66        Ok(correlation_result)
67    }
68
69    fn generate_subtitle_signal(
70        &self,
71        subtitle: &Subtitle,
72        total_duration: f32,
73        sample_rate: u32,
74    ) -> Vec<f32> {
75        let sample_rate = sample_rate as f32;
76        let signal_length = (total_duration * sample_rate) as usize;
77        let mut signal = vec![0.0; signal_length];
78
79        for entry in &subtitle.entries {
80            let start = (entry.start_time.as_secs_f32() * sample_rate) as usize;
81            let end = (entry.end_time.as_secs_f32() * sample_rate) as usize;
82            let range_end = end.min(signal_length);
83            signal[start..range_end].iter_mut().for_each(|v| *v = 1.0);
84        }
85
86        signal
87    }
88
89    fn calculate_cross_correlation(
90        &self,
91        audio_envelope: &AudioEnvelope,
92        subtitle_signal: &[f32],
93    ) -> Result<SyncResult> {
94        let max_offset_samples =
95            (self.config.max_offset_seconds * audio_envelope.sample_rate as f32) as i32;
96        let mut best_offset = 0;
97        let mut best_correlation = 0.0;
98
99        for offset in -max_offset_samples..=max_offset_samples {
100            let corr = self.calculate_correlation_at_offset(
101                &audio_envelope.samples,
102                subtitle_signal,
103                offset,
104            );
105            if corr > best_correlation {
106                best_correlation = corr;
107                best_offset = offset;
108            }
109        }
110
111        let offset_seconds = best_offset as f32 / audio_envelope.sample_rate as f32;
112        let confidence = if best_correlation > self.config.correlation_threshold {
113            best_correlation
114        } else {
115            0.0
116        };
117
118        Ok(SyncResult {
119            offset_seconds,
120            confidence,
121            method_used: SyncMethod::AudioCorrelation,
122            correlation_peak: best_correlation,
123        })
124    }
125
126    fn calculate_correlation_at_offset(
127        &self,
128        audio_signal: &[f32],
129        subtitle_signal: &[f32],
130        offset: i32,
131    ) -> f32 {
132        let audio_len = audio_signal.len() as i32;
133        let subtitle_len = subtitle_signal.len() as i32;
134        let mut sum_product = 0.0;
135        let mut sum_audio_sq = 0.0;
136        let mut sum_sub_sq = 0.0;
137        let mut count = 0;
138
139        for i in 0..audio_len {
140            let j = i + offset;
141            if j >= 0 && j < subtitle_len {
142                let a = audio_signal[i as usize];
143                let s = subtitle_signal[j as usize];
144                sum_product += a * s;
145                sum_audio_sq += a * a;
146                sum_sub_sq += s * s;
147                count += 1;
148            }
149        }
150
151        if count == 0 || sum_audio_sq == 0.0 || sum_sub_sq == 0.0 {
152            return 0.0;
153        }
154
155        sum_product / (sum_audio_sq.sqrt() * sum_sub_sq.sqrt())
156    }
157
158    /// 套用同步偏移到字幕
159    pub fn apply_sync_offset(&self, subtitle: &mut Subtitle, offset_seconds: f32) -> Result<()> {
160        let offset_dur = std::time::Duration::from_secs_f32(offset_seconds.abs());
161        for entry in &mut subtitle.entries {
162            if offset_seconds >= 0.0 {
163                entry.start_time += offset_dur;
164                entry.end_time += offset_dur;
165            } else if entry.start_time > offset_dur {
166                entry.start_time -= offset_dur;
167                entry.end_time -= offset_dur;
168            } else {
169                let rem = offset_dur - entry.start_time;
170                entry.start_time = std::time::Duration::ZERO;
171                if entry.end_time > rem {
172                    entry.end_time -= rem;
173                } else {
174                    entry.end_time = std::time::Duration::ZERO;
175                }
176            }
177        }
178        Ok(())
179    }
180}