subx_cli/core/sync/
engine.rs

1//! Subtitle synchronization engine using audio analysis and pattern matching.
2//!
3//! This module provides `SyncEngine` and related types to align subtitle timing
4//! with audio tracks based on correlation and dialogue analysis.
5//!
6//! # Examples
7//!
8//! ```rust
9//! use subx_cli::core::sync::engine::{SyncEngine, SyncConfig};
10//! let config = SyncConfig { max_offset_seconds: 5.0, correlation_threshold: 0.8, dialogue_threshold: 0.5, min_dialogue_length: 1.0 };
11//! let engine = SyncEngine::new(config);
12//! ```
13use crate::Result;
14use crate::core::formats::Subtitle;
15use crate::services::audio::{AudioAnalyzer, AudioEnvelope};
16use std::path::Path;
17
18/// Synchronization engine for aligning subtitles with audio tracks.
19pub struct SyncEngine {
20    audio_analyzer: AudioAnalyzer,
21    config: SyncConfig,
22}
23
24/// Configuration parameters for the subtitle synchronization process.
25///
26/// Controls various aspects of the audio-subtitle synchronization algorithm,
27/// including detection thresholds and search ranges.
28#[derive(Debug, Clone)]
29pub struct SyncConfig {
30    /// Maximum time offset to search for synchronization (in seconds)
31    pub max_offset_seconds: f32,
32    /// Minimum correlation threshold for accepting a sync match
33    pub correlation_threshold: f32,
34    /// Threshold for detecting dialogue in audio analysis
35    pub dialogue_threshold: f32,
36    /// Minimum length required for dialogue segments (in seconds)
37    pub min_dialogue_length: f32,
38}
39
40#[cfg(test)]
41mod tests {
42    use super::*;
43    use crate::core::formats::{Subtitle, SubtitleEntry, SubtitleFormatType, SubtitleMetadata};
44    use std::time::Duration;
45
46    /// Test manual offset application to subtitle timings
47    #[test]
48    fn test_apply_sync_offset_positive() {
49        let mut subtitle = Subtitle {
50            entries: vec![SubtitleEntry::new(
51                1,
52                Duration::from_secs(1),
53                Duration::from_secs(2),
54                String::from("test"),
55            )],
56            metadata: SubtitleMetadata::default(),
57            format: SubtitleFormatType::Srt,
58        };
59        let engine = SyncEngine::new(SyncConfig {
60            max_offset_seconds: 0.0,
61            correlation_threshold: 0.0,
62            dialogue_threshold: 0.0,
63            min_dialogue_length: 0.0,
64        });
65        engine.apply_sync_offset(&mut subtitle, 2.0).unwrap();
66        assert_eq!(subtitle.entries[0].start_time, Duration::from_secs(3));
67        assert_eq!(subtitle.entries[0].end_time, Duration::from_secs(4));
68    }
69
70    /// Test negative offset application to subtitle timings
71    #[test]
72    fn test_apply_sync_offset_negative() {
73        let mut subtitle = Subtitle {
74            entries: vec![SubtitleEntry::new(
75                1,
76                Duration::from_secs(5),
77                Duration::from_secs(7),
78                String::from("test"),
79            )],
80            metadata: SubtitleMetadata::default(),
81            format: SubtitleFormatType::Srt,
82        };
83        let engine = SyncEngine::new(SyncConfig {
84            max_offset_seconds: 0.0,
85            correlation_threshold: 0.0,
86            dialogue_threshold: 0.0,
87            min_dialogue_length: 0.0,
88        });
89        engine.apply_sync_offset(&mut subtitle, -2.0).unwrap();
90        assert_eq!(subtitle.entries[0].start_time, Duration::from_secs(3));
91        assert_eq!(subtitle.entries[0].end_time, Duration::from_secs(5));
92    }
93
94    /// Test sync configuration validation
95    #[test]
96    fn test_sync_config_creation() {
97        let config = SyncConfig {
98            max_offset_seconds: 5.0,
99            correlation_threshold: 0.8,
100            dialogue_threshold: 0.5,
101            min_dialogue_length: 1.0,
102        };
103
104        assert_eq!(config.max_offset_seconds, 5.0);
105        assert_eq!(config.correlation_threshold, 0.8);
106        assert_eq!(config.dialogue_threshold, 0.5);
107        assert_eq!(config.min_dialogue_length, 1.0);
108    }
109
110    /// Test audio correlation algorithm with known signals
111    #[test]
112    fn test_calculate_correlation_at_offset() {
113        let engine = SyncEngine::new(SyncConfig {
114            max_offset_seconds: 1.0,
115            correlation_threshold: 0.7,
116            dialogue_threshold: 0.3,
117            min_dialogue_length: 0.5,
118        });
119
120        // Create identical signals - should have perfect correlation at offset 0
121        let audio_signal = vec![0.5, 0.8, 0.2, 0.9, 0.1];
122        let subtitle_signal = vec![0.5, 0.8, 0.2, 0.9, 0.1];
123
124        let correlation =
125            engine.calculate_correlation_at_offset(&audio_signal, &subtitle_signal, 0);
126        assert!(
127            correlation > 0.99,
128            "Perfect correlation should be close to 1.0, got: {}",
129            correlation
130        );
131
132        // Test with offset
133        let correlation_offset =
134            engine.calculate_correlation_at_offset(&audio_signal, &subtitle_signal, 1);
135        assert!(
136            correlation_offset < correlation,
137            "Correlation with offset should be lower"
138        );
139    }
140
141    /// Test subtitle signal generation algorithm
142    #[test]
143    fn test_generate_subtitle_signal() {
144        let engine = SyncEngine::new(SyncConfig {
145            max_offset_seconds: 5.0,
146            correlation_threshold: 0.8,
147            dialogue_threshold: 0.5,
148            min_dialogue_length: 1.0,
149        });
150
151        let subtitle = Subtitle {
152            entries: vec![
153                SubtitleEntry::new(
154                    1,
155                    Duration::from_secs(1),
156                    Duration::from_secs(2),
157                    "Test 1".to_string(),
158                ),
159                SubtitleEntry::new(
160                    2,
161                    Duration::from_secs(4),
162                    Duration::from_secs(5),
163                    "Test 2".to_string(),
164                ),
165            ],
166            metadata: SubtitleMetadata::default(),
167            format: SubtitleFormatType::Srt,
168        };
169
170        let signal = engine.generate_subtitle_signal(&subtitle, 6.0, 1); // 1 Hz for simplicity
171
172        // Signal should be 6 samples long (6 seconds * 1 Hz)
173        assert_eq!(signal.len(), 6);
174
175        // Check subtitle coverage: samples 1-2 and 4-5 should be 1.0
176        assert_eq!(signal[0], 0.0); // Before first subtitle
177        assert_eq!(signal[1], 1.0); // First subtitle (1-2s)
178        assert_eq!(signal[2], 0.0); // Gap between subtitles
179        assert_eq!(signal[3], 0.0); // Gap continues
180        assert_eq!(signal[4], 1.0); // Second subtitle (4-5s)
181        assert_eq!(signal[5], 0.0); // After last subtitle
182    }
183
184    /// Test cross-correlation result structure
185    #[test]
186    fn test_sync_result_creation() {
187        let result = SyncResult {
188            offset_seconds: 2.5,
189            confidence: 0.85,
190            method_used: SyncMethod::AudioCorrelation,
191            correlation_peak: 0.92,
192        };
193
194        assert_eq!(result.offset_seconds, 2.5);
195        assert_eq!(result.confidence, 0.85);
196        assert!(matches!(result.method_used, SyncMethod::AudioCorrelation));
197        assert_eq!(result.correlation_peak, 0.92);
198    }
199
200    /// Test engine initialization with different configurations
201    #[test]
202    fn test_engine_initialization() {
203        let config = SyncConfig {
204            max_offset_seconds: 10.0,
205            correlation_threshold: 0.6,
206            dialogue_threshold: 0.4,
207            min_dialogue_length: 2.0,
208        };
209
210        let engine = SyncEngine::new(config);
211        assert_eq!(engine.config.max_offset_seconds, 10.0);
212        assert_eq!(engine.config.correlation_threshold, 0.6);
213    }
214
215    /// Test zero-time edge cases in offset application
216    #[test]
217    fn test_apply_sync_offset_edge_cases() {
218        let engine = SyncEngine::new(SyncConfig {
219            max_offset_seconds: 5.0,
220            correlation_threshold: 0.8,
221            dialogue_threshold: 0.5,
222            min_dialogue_length: 1.0,
223        });
224
225        // Test zero offset
226        let mut subtitle = Subtitle {
227            entries: vec![SubtitleEntry::new(
228                1,
229                Duration::from_secs(2),
230                Duration::from_secs(4),
231                "Test".to_string(),
232            )],
233            metadata: SubtitleMetadata::default(),
234            format: SubtitleFormatType::Srt,
235        };
236
237        engine.apply_sync_offset(&mut subtitle, 0.0).unwrap();
238        assert_eq!(subtitle.entries[0].start_time, Duration::from_secs(2));
239        assert_eq!(subtitle.entries[0].end_time, Duration::from_secs(4));
240
241        // Test negative offset larger than start time (should clamp to zero)
242        engine.apply_sync_offset(&mut subtitle, -3.0).unwrap();
243        assert_eq!(subtitle.entries[0].start_time, Duration::ZERO);
244        assert_eq!(subtitle.entries[0].end_time, Duration::from_secs(3));
245    }
246
247    /// Test correlation algorithm with misaligned signals
248    #[test]
249    fn test_correlation_with_misalignment() {
250        let engine = SyncEngine::new(SyncConfig {
251            max_offset_seconds: 2.0,
252            correlation_threshold: 0.5,
253            dialogue_threshold: 0.3,
254            min_dialogue_length: 0.5,
255        });
256
257        // Audio signal with peak at position 3
258        let audio_signal = vec![0.1, 0.2, 0.1, 0.9, 0.1, 0.2, 0.1];
259        // Subtitle signal with peak at position 1 (should correlate better when shifted)
260        let subtitle_signal = vec![0.1, 0.9, 0.1, 0.2, 0.1];
261
262        // Test various offsets to find the best correlation
263        let mut best_corr = 0.0;
264        let mut best_offset = 0;
265
266        for offset in -3..=3 {
267            let corr =
268                engine.calculate_correlation_at_offset(&audio_signal, &subtitle_signal, offset);
269            if corr > best_corr {
270                best_corr = corr;
271                best_offset = offset;
272            }
273        }
274
275        // The best correlation should be found at offset -2 (shifting subtitle signal left to align peaks)
276        assert_eq!(best_offset, -2);
277        assert!(
278            best_corr > 0.5,
279            "Best correlation should be reasonably high: {}",
280            best_corr
281        );
282    }
283
284    /// Test subtitle signal generation with overlapping entries
285    #[test]
286    fn test_generate_subtitle_signal_overlapping() {
287        let engine = SyncEngine::new(SyncConfig {
288            max_offset_seconds: 5.0,
289            correlation_threshold: 0.8,
290            dialogue_threshold: 0.5,
291            min_dialogue_length: 1.0,
292        });
293
294        let subtitle = Subtitle {
295            entries: vec![
296                SubtitleEntry::new(
297                    1,
298                    Duration::from_secs(1),
299                    Duration::from_secs(3),
300                    "First".to_string(),
301                ),
302                SubtitleEntry::new(
303                    2,
304                    Duration::from_secs(2),
305                    Duration::from_secs(4),
306                    "Second".to_string(),
307                ),
308            ],
309            metadata: SubtitleMetadata::default(),
310            format: SubtitleFormatType::Srt,
311        };
312
313        let signal = engine.generate_subtitle_signal(&subtitle, 5.0, 1); // 1 Hz
314
315        // Overlapping region (2-3s) should still be 1.0
316        assert_eq!(signal[0], 0.0); // Before any subtitle
317        assert_eq!(signal[1], 1.0); // First subtitle starts
318        assert_eq!(signal[2], 1.0); // Overlapping region
319        assert_eq!(signal[3], 1.0); // Second subtitle continues
320        assert_eq!(signal[4], 0.0); // After all subtitles
321    }
322
323    /// Test correlation calculation with empty or invalid signals
324    #[test]
325    fn test_correlation_edge_cases() {
326        let engine = SyncEngine::new(SyncConfig {
327            max_offset_seconds: 1.0,
328            correlation_threshold: 0.5,
329            dialogue_threshold: 0.3,
330            min_dialogue_length: 0.5,
331        });
332
333        // Test with all-zero signals
334        let zero_signal = vec![0.0; 5];
335        let correlation = engine.calculate_correlation_at_offset(&zero_signal, &zero_signal, 0);
336        assert_eq!(
337            correlation, 0.0,
338            "Correlation of zero signals should be 0.0"
339        );
340
341        // Test with empty signals
342        let empty_signal = vec![];
343        let correlation = engine.calculate_correlation_at_offset(&empty_signal, &empty_signal, 0);
344        assert_eq!(
345            correlation, 0.0,
346            "Correlation of empty signals should be 0.0"
347        );
348
349        // Test with out-of-bounds offset
350        let signal = vec![1.0, 2.0, 3.0];
351        let correlation = engine.calculate_correlation_at_offset(&signal, &signal, 10);
352        assert_eq!(
353            correlation, 0.0,
354            "Correlation with out-of-bounds offset should be 0.0"
355        );
356    }
357}
358
359/// Result of the subtitle synchronization process.
360///
361/// Contains detailed information about the synchronization outcome,
362/// including timing adjustments and confidence metrics.
363#[derive(Debug)]
364pub struct SyncResult {
365    /// Time offset in seconds to apply to subtitle timing
366    pub offset_seconds: f32,
367    /// Confidence level of the synchronization result (0.0 to 1.0)
368    pub confidence: f32,
369    /// Method used to achieve synchronization
370    pub method_used: SyncMethod,
371    /// Peak correlation value found during analysis
372    pub correlation_peak: f32,
373}
374
375/// Available methods for synchronizing subtitles with audio.
376///
377/// Represents different algorithms and approaches that can be used
378/// to determine the correct timing offset between audio and subtitles.
379#[derive(Debug)]
380pub enum SyncMethod {
381    /// Correlation-based synchronization using audio analysis
382    AudioCorrelation,
383    /// Manual offset specified by the user
384    ManualOffset,
385    /// Pattern matching between subtitle and audio timing
386    PatternMatching,
387}
388
389impl SyncEngine {
390    /// Creates a new `SyncEngine` instance with the given configuration.
391    pub fn new(config: SyncConfig) -> Self {
392        Self {
393            audio_analyzer: AudioAnalyzer::new(16000),
394            config,
395        }
396    }
397
398    /// Automatically adjusts subtitle timing to match the audio in the video file.
399    ///
400    /// # Arguments
401    ///
402    /// * `video_path` - Path to the source video or audio file.
403    /// * `subtitle` - The subtitle object to synchronize.
404    pub async fn sync_subtitle(
405        &self,
406        video_path: &Path,
407        subtitle: &Subtitle,
408    ) -> Result<SyncResult> {
409        let audio_envelope = self.audio_analyzer.extract_envelope(video_path).await?;
410        let _dialogue_segments = self
411            .audio_analyzer
412            .detect_dialogue(&audio_envelope, self.config.dialogue_threshold);
413
414        let subtitle_signal = self.generate_subtitle_signal(
415            subtitle,
416            audio_envelope.duration,
417            audio_envelope.sample_rate,
418        );
419        let correlation_result =
420            self.calculate_cross_correlation(&audio_envelope, &subtitle_signal)?;
421
422        Ok(correlation_result)
423    }
424
425    fn generate_subtitle_signal(
426        &self,
427        subtitle: &Subtitle,
428        total_duration: f32,
429        sample_rate: u32,
430    ) -> Vec<f32> {
431        let sample_rate = sample_rate as f32;
432        let signal_length = (total_duration * sample_rate) as usize;
433        let mut signal = vec![0.0; signal_length];
434
435        for entry in &subtitle.entries {
436            let start = (entry.start_time.as_secs_f32() * sample_rate) as usize;
437            let end = (entry.end_time.as_secs_f32() * sample_rate) as usize;
438            let range_end = end.min(signal_length);
439            signal[start..range_end].iter_mut().for_each(|v| *v = 1.0);
440        }
441
442        signal
443    }
444
445    fn calculate_cross_correlation(
446        &self,
447        audio_envelope: &AudioEnvelope,
448        subtitle_signal: &[f32],
449    ) -> Result<SyncResult> {
450        let max_offset_samples =
451            (self.config.max_offset_seconds * audio_envelope.sample_rate as f32) as i32;
452        let mut best_offset = 0;
453        let mut best_correlation = 0.0;
454
455        for offset in -max_offset_samples..=max_offset_samples {
456            let corr = self.calculate_correlation_at_offset(
457                &audio_envelope.samples,
458                subtitle_signal,
459                offset,
460            );
461            if corr > best_correlation {
462                best_correlation = corr;
463                best_offset = offset;
464            }
465        }
466
467        let offset_seconds = best_offset as f32 / audio_envelope.sample_rate as f32;
468        let confidence = if best_correlation > self.config.correlation_threshold {
469            best_correlation
470        } else {
471            0.0
472        };
473
474        Ok(SyncResult {
475            offset_seconds,
476            confidence,
477            method_used: SyncMethod::AudioCorrelation,
478            correlation_peak: best_correlation,
479        })
480    }
481
482    fn calculate_correlation_at_offset(
483        &self,
484        audio_signal: &[f32],
485        subtitle_signal: &[f32],
486        offset: i32,
487    ) -> f32 {
488        let audio_len = audio_signal.len() as i32;
489        let subtitle_len = subtitle_signal.len() as i32;
490        let mut sum_product = 0.0;
491        let mut sum_audio_sq = 0.0;
492        let mut sum_sub_sq = 0.0;
493        let mut count = 0;
494
495        for i in 0..audio_len {
496            let j = i + offset;
497            if j >= 0 && j < subtitle_len {
498                let a = audio_signal[i as usize];
499                let s = subtitle_signal[j as usize];
500                sum_product += a * s;
501                sum_audio_sq += a * a;
502                sum_sub_sq += s * s;
503                count += 1;
504            }
505        }
506
507        if count == 0 || sum_audio_sq == 0.0 || sum_sub_sq == 0.0 {
508            return 0.0;
509        }
510
511        sum_product / (sum_audio_sq.sqrt() * sum_sub_sq.sqrt())
512    }
513
514    /// Apply sync offset to subtitle
515    pub fn apply_sync_offset(&self, subtitle: &mut Subtitle, offset_seconds: f32) -> Result<()> {
516        let offset_dur = std::time::Duration::from_secs_f32(offset_seconds.abs());
517        for entry in &mut subtitle.entries {
518            if offset_seconds >= 0.0 {
519                entry.start_time += offset_dur;
520                entry.end_time += offset_dur;
521            } else if entry.start_time > offset_dur {
522                entry.start_time -= offset_dur;
523                entry.end_time -= offset_dur;
524            } else {
525                let rem = offset_dur - entry.start_time;
526                entry.start_time = std::time::Duration::ZERO;
527                if entry.end_time > rem {
528                    entry.end_time -= rem;
529                } else {
530                    entry.end_time = std::time::Duration::ZERO;
531                }
532            }
533        }
534        Ok(())
535    }
536}