subx_cli/core/sync/
engine.rs

1//! Refactored sync engine with VAD (Voice Activity Detection) support.
2//!
3//! This module provides unified subtitle synchronization functionality using
4//! local VAD (Voice Activity Detection) for voice detection and sync offset calculation.
5
6use serde::{Deserialize, Serialize};
7use serde_json::json;
8use std::path::Path;
9use std::time::{Duration, Instant};
10
11use crate::config::SyncConfig;
12use crate::core::formats::Subtitle;
13use crate::services::vad::VadSyncDetector;
14use crate::{Result, error::SubXError};
15
16/// Unified sync engine based on VAD voice detection.
17///
18/// This engine provides automatic subtitle synchronization using Voice Activity
19/// Detection (VAD) to analyze audio tracks and calculate optimal sync offsets.
20pub struct SyncEngine {
21    config: SyncConfig,
22    vad_detector: Option<VadSyncDetector>,
23}
24
25impl SyncEngine {
26    /// Create a new sync engine instance.
27    ///
28    /// # Arguments
29    ///
30    /// * `config` - Sync configuration containing VAD settings and thresholds
31    ///
32    /// # Returns
33    ///
34    /// A new sync engine instance with initialized VAD detector if enabled.
35    pub fn new(config: SyncConfig) -> Result<Self> {
36        let vad_detector = if config.vad.enabled {
37            match VadSyncDetector::new(config.vad.clone()) {
38                Ok(det) => Some(det),
39                Err(e) => {
40                    log::warn!("VAD initialization failed: {}", e);
41                    None
42                }
43            }
44        } else {
45            None
46        };
47
48        if vad_detector.is_none() {
49            return Err(SubXError::config(
50                "VAD detector is required but not available",
51            ));
52        }
53
54        Ok(Self {
55            config,
56            vad_detector,
57        })
58    }
59
60    /// Detect sync offset using automatic or specified method.
61    ///
62    /// # Arguments
63    ///
64    /// * `audio_path` - Path to the audio file for analysis
65    /// * `subtitle` - Subtitle data to synchronize
66    /// * `method` - Optional sync method, defaults to automatic detection
67    ///
68    /// # Returns
69    ///
70    /// Sync result containing offset, confidence, and processing metadata.
71    pub async fn detect_sync_offset(
72        &self,
73        audio_path: &Path,
74        subtitle: &Subtitle,
75        method: Option<SyncMethod>,
76    ) -> Result<SyncResult> {
77        let start = Instant::now();
78        let m = method.unwrap_or_else(|| self.determine_default_method());
79        let mut res = match m {
80            SyncMethod::Auto | SyncMethod::LocalVad => {
81                self.vad_detect_sync_offset(audio_path, subtitle).await?
82            }
83            SyncMethod::Manual => {
84                return Err(SubXError::config("Manual method requires explicit offset"));
85            }
86        };
87        res.processing_duration = start.elapsed();
88        Ok(res)
89    }
90
91    async fn auto_detect_sync_offset(
92        &self,
93        audio_path: &Path,
94        subtitle: &Subtitle,
95    ) -> Result<SyncResult> {
96        // Auto mode uses VAD
97        if self.vad_detector.is_some() {
98            return self.vad_detect_sync_offset(audio_path, subtitle).await;
99        }
100        Err(SubXError::audio_processing(
101            "No detector available in auto mode",
102        ))
103    }
104
105    /// Apply manual offset to subtitle timing.
106    ///
107    /// # Arguments
108    ///
109    /// * `subtitle` - Mutable subtitle data to modify
110    /// * `offset_seconds` - Offset in seconds (positive delays, negative advances)
111    ///
112    /// # Returns
113    ///
114    /// Sync result with the applied offset and full confidence.
115    ///
116    /// # Errors
117    ///
118    /// Returns an error if the offset exceeds the configured maximum.
119    pub fn apply_manual_offset(
120        &self,
121        subtitle: &mut Subtitle,
122        offset_seconds: f32,
123    ) -> Result<SyncResult> {
124        // Validate offset against max_offset_seconds configuration
125        if offset_seconds.abs() > self.config.max_offset_seconds {
126            return Err(SubXError::config(format!(
127                "Offset {:.2}s exceeds maximum allowed value {:.2}s. Please check the sync.max_offset_seconds configuration or use a smaller offset.",
128                offset_seconds, self.config.max_offset_seconds
129            )));
130        }
131
132        let start = Instant::now();
133        for entry in &mut subtitle.entries {
134            entry.start_time = entry
135                .start_time
136                .checked_add(Duration::from_secs_f32(offset_seconds.abs()))
137                .or_else(|| {
138                    if offset_seconds < 0.0 {
139                        entry
140                            .start_time
141                            .checked_sub(Duration::from_secs_f32(-offset_seconds))
142                    } else {
143                        None
144                    }
145                })
146                .ok_or_else(|| {
147                    SubXError::audio_processing("Invalid offset results in negative time")
148                })?;
149            entry.end_time = entry
150                .end_time
151                .checked_add(Duration::from_secs_f32(offset_seconds.abs()))
152                .or_else(|| {
153                    if offset_seconds < 0.0 {
154                        entry
155                            .end_time
156                            .checked_sub(Duration::from_secs_f32(-offset_seconds))
157                    } else {
158                        None
159                    }
160                })
161                .ok_or_else(|| {
162                    SubXError::audio_processing("Invalid offset results in negative time")
163                })?;
164        }
165        Ok(SyncResult {
166            offset_seconds,
167            confidence: 1.0,
168            method_used: SyncMethod::Manual,
169            correlation_peak: 1.0,
170            additional_info: Some(json!({
171                "applied_offset": offset_seconds,
172                "entries_modified": subtitle.entries.len(),
173            })),
174            processing_duration: start.elapsed(),
175            warnings: Vec::new(),
176        })
177    }
178
179    fn determine_default_method(&self) -> SyncMethod {
180        match self.config.default_method.as_str() {
181            "vad" => SyncMethod::LocalVad,
182            _ => SyncMethod::Auto,
183        }
184    }
185
186    async fn vad_detect_sync_offset(
187        &self,
188        audio_path: &Path,
189        subtitle: &Subtitle,
190    ) -> Result<SyncResult> {
191        let det = self
192            .vad_detector
193            .as_ref()
194            .ok_or_else(|| SubXError::audio_processing("VAD detector not available"))?;
195
196        let mut result = det.detect_sync_offset(audio_path, subtitle, 0).await?; // analysis_window_seconds no longer used
197
198        // Validate detected offset against max_offset_seconds configuration
199        if result.offset_seconds.abs() > self.config.max_offset_seconds {
200            // Provide warning but don't completely fail, allow user to decide
201            result.warnings.push(format!(
202                "Detected offset {:.2}s exceeds configured maximum value {:.2}s. Consider checking audio quality or adjusting sync.max_offset_seconds configuration.",
203                result.offset_seconds, self.config.max_offset_seconds
204            ));
205
206            // Optionally: clamp to maximum value (preserving sign)
207            let sign = if result.offset_seconds >= 0.0 {
208                1.0
209            } else {
210                -1.0
211            };
212            let original_offset = result.offset_seconds;
213            result.offset_seconds = sign * self.config.max_offset_seconds;
214
215            result.additional_info = Some(json!({
216                "original_offset": original_offset,
217                "clamped_offset": result.offset_seconds,
218                "reason": "Exceeded max_offset_seconds configuration"
219            }));
220        }
221
222        Ok(result)
223    }
224}
225
226/// Sync method enumeration.
227///
228/// Defines the available methods for subtitle synchronization,
229/// from automatic detection to manual offset specification.
230#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
231pub enum SyncMethod {
232    /// Automatic method selection (currently VAD only).
233    Auto,
234    /// Local VAD (Voice Activity Detection) processing.
235    LocalVad,
236    /// Manual offset specification.
237    Manual,
238}
239
240/// Synchronization result structure.
241///
242/// Contains the complete results of subtitle synchronization analysis,
243/// including calculated offset, confidence metrics, and processing metadata.
244#[derive(Debug, Clone)]
245pub struct SyncResult {
246    /// Calculated time offset in seconds
247    pub offset_seconds: f32,
248    /// Confidence level of the detection (0.0-1.0)
249    pub confidence: f32,
250    /// Synchronization method that was used
251    pub method_used: SyncMethod,
252    /// Peak correlation value from analysis
253    pub correlation_peak: f32,
254    /// Additional method-specific information
255    pub additional_info: Option<serde_json::Value>,
256    /// Time taken to complete the analysis
257    pub processing_duration: Duration,
258    /// Any warnings generated during processing
259    pub warnings: Vec<String>,
260}
261
262/// Method selection strategy for synchronization analysis.
263///
264/// Defines preferences and fallback behavior for automatic method selection
265/// when multiple synchronization approaches are available.
266#[derive(Debug, Clone)]
267pub struct MethodSelectionStrategy {
268    /// Preferred methods in order of preference
269    pub preferred_methods: Vec<SyncMethod>,
270    /// Minimum confidence threshold for accepting results
271    pub min_confidence_threshold: f32,
272    /// Whether to allow fallback to alternative methods
273    pub allow_fallback: bool,
274    /// Maximum time to spend on analysis attempts
275    pub max_attempt_duration: u32,
276}
277
278// Unit test module: Supplement sync engine core behavior verification
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use crate::config::{TestConfigBuilder, TestConfigService, service::ConfigService};
283    use crate::core::formats::{Subtitle, SubtitleEntry, SubtitleFormatType, SubtitleMetadata};
284    use std::time::Duration;
285
286    #[tokio::test]
287    async fn test_sync_engine_creation() {
288        let config = TestConfigBuilder::new()
289            .with_vad_enabled(true)
290            .build_config();
291        let config_service = TestConfigService::new(config);
292        let result = SyncEngine::new(config_service.get_config().unwrap().sync);
293        assert!(result.is_ok());
294    }
295
296    #[tokio::test]
297    async fn test_manual_offset_application() {
298        let config = TestConfigBuilder::new().build_config();
299        let config_service = TestConfigService::new(config);
300        let engine = SyncEngine::new(config_service.get_config().unwrap().sync).unwrap();
301
302        let mut subtitle = create_test_subtitle();
303        let original_start = subtitle.entries[0].start_time;
304
305        let result = engine.apply_manual_offset(&mut subtitle, 2.5).unwrap();
306        assert_eq!(result.offset_seconds, 2.5);
307        assert_eq!(result.method_used, SyncMethod::Manual);
308        assert_eq!(result.confidence, 1.0);
309
310        let expected_start = original_start + Duration::from_secs_f32(2.5);
311        assert_eq!(subtitle.entries[0].start_time, expected_start);
312    }
313
314    #[tokio::test]
315    async fn test_determine_default_method() {
316        let test_cases = vec![("vad", SyncMethod::LocalVad), ("unknown", SyncMethod::Auto)];
317
318        for (config_value, expected_method) in test_cases {
319            let config = TestConfigBuilder::new()
320                .with_sync_method(config_value)
321                .build_config();
322            let engine = SyncEngine::new(config.sync).unwrap();
323            assert_eq!(engine.determine_default_method(), expected_method);
324        }
325    }
326
327    #[tokio::test]
328    async fn test_method_selection_strategy_struct() {
329        let strategy = MethodSelectionStrategy {
330            preferred_methods: vec![SyncMethod::LocalVad],
331            min_confidence_threshold: 0.7,
332            allow_fallback: true,
333            max_attempt_duration: 60,
334        };
335        assert_eq!(strategy.preferred_methods.len(), 1);
336        assert!(strategy.allow_fallback);
337    }
338
339    fn create_test_subtitle() -> Subtitle {
340        Subtitle {
341            entries: vec![SubtitleEntry::new(
342                1,
343                Duration::from_secs(10),
344                Duration::from_secs(12),
345                "Test subtitle".to_string(),
346            )],
347            metadata: SubtitleMetadata::default(),
348            format: SubtitleFormatType::Srt,
349        }
350    }
351}
352
353/// Backward compatibility - Deprecated legacy SyncConfig structure.
354#[deprecated(note = "Use new SyncConfig with Whisper and VAD support")]
355pub struct OldSyncConfig {
356    /// Maximum search offset in seconds for synchronization.
357    pub max_offset_seconds: f32,
358    /// Minimum correlation threshold for accepting sync results.
359    pub correlation_threshold: f32,
360    /// Dialogue detection threshold for identifying speech segments.
361    pub dialogue_threshold: f32,
362    /// Minimum dialogue segment length in seconds.
363    pub min_dialogue_length: f32,
364}