subx_cli/core/sync/
engine.rs

1//! Refactored sync engine with VAD (Voice Activity Detection) support.
2//!
3//! This module provides unified subtitle synchronization functionality using
4//! local VAD (Voice Activity Detection) for voice detection and sync offset calculation.
5
6use log::{debug, warn};
7use serde::{Deserialize, Serialize};
8use serde_json::json;
9use std::path::Path;
10use std::time::{Duration, Instant};
11
12use crate::config::SyncConfig;
13use crate::core::formats::Subtitle;
14use crate::services::vad::VadSyncDetector;
15use crate::{Result, error::SubXError};
16
17/// Unified sync engine based on VAD voice detection.
18///
19/// This engine provides automatic subtitle synchronization using Voice Activity
20/// Detection (VAD) to analyze audio tracks and calculate optimal sync offsets.
21pub struct SyncEngine {
22    config: SyncConfig,
23    vad_detector: Option<VadSyncDetector>,
24}
25
26impl SyncEngine {
27    /// Create a new sync engine instance.
28    ///
29    /// # Arguments
30    ///
31    /// * `config` - Sync configuration containing VAD settings and thresholds
32    ///
33    /// # Returns
34    ///
35    /// A new sync engine instance with initialized VAD detector if enabled.
36    pub fn new(config: SyncConfig) -> Result<Self> {
37        let vad_detector = if config.vad.enabled {
38            match VadSyncDetector::new(config.vad.clone()) {
39                Ok(det) => {
40                    debug!(
41                        "[SyncEngine] VAD detector initialized successfully with config: {:?}",
42                        config.vad
43                    );
44                    Some(det)
45                }
46                Err(e) => {
47                    warn!("[SyncEngine] VAD initialization failed: {}", e);
48                    None
49                }
50            }
51        } else {
52            debug!("[SyncEngine] VAD is disabled in config");
53            None
54        };
55
56        if vad_detector.is_none() {
57            warn!("[SyncEngine] VAD detector is required but not available");
58            return Err(SubXError::config(
59                "VAD detector is required but not available",
60            ));
61        }
62
63        debug!("[SyncEngine] SyncEngine created with VAD detector");
64        Ok(Self {
65            config,
66            vad_detector,
67        })
68    }
69
70    /// Detect sync offset using automatic or specified method.
71    ///
72    /// # Arguments
73    ///
74    /// * `audio_path` - Path to the audio file for analysis
75    /// * `subtitle` - Subtitle data to synchronize
76    /// * `method` - Optional sync method, defaults to automatic detection
77    ///
78    /// # Returns
79    ///
80    /// Sync result containing offset, confidence, and processing metadata.
81    pub async fn detect_sync_offset(
82        &self,
83        audio_path: &Path,
84        subtitle: &Subtitle,
85        method: Option<SyncMethod>,
86    ) -> Result<SyncResult> {
87        debug!(
88            "[SyncEngine] detect_sync_offset called | audio_path: {:?}, subtitle entries: {}, method: {:?}",
89            audio_path,
90            subtitle.entries.len(),
91            method
92        );
93        let start = Instant::now();
94        let m = method.unwrap_or_else(|| self.determine_default_method());
95        debug!("[SyncEngine] Using sync method: {:?}", m);
96        let mut res = match m {
97            SyncMethod::Auto | SyncMethod::LocalVad => {
98                self.vad_detect_sync_offset(audio_path, subtitle).await?
99            }
100            SyncMethod::Manual => {
101                debug!("[SyncEngine] Manual method selected but not supported in this context");
102                return Err(SubXError::config("Manual method requires explicit offset"));
103            }
104        };
105        res.processing_duration = start.elapsed();
106        debug!(
107            "[SyncEngine] detect_sync_offset finished | offset_seconds: {:.3}, confidence: {:.3}, duration_ms: {}",
108            res.offset_seconds,
109            res.confidence,
110            res.processing_duration.as_millis()
111        );
112        Ok(res)
113    }
114
115    async fn auto_detect_sync_offset(
116        &self,
117        audio_path: &Path,
118        subtitle: &Subtitle,
119    ) -> Result<SyncResult> {
120        debug!(
121            "[SyncEngine] auto_detect_sync_offset called | audio_path: {:?}, subtitle entries: {}",
122            audio_path,
123            subtitle.entries.len()
124        );
125        // Auto mode uses VAD
126        if self.vad_detector.is_some() {
127            return self.vad_detect_sync_offset(audio_path, subtitle).await;
128        }
129        Err(SubXError::audio_processing(
130            "No detector available in auto mode",
131        ))
132    }
133
134    /// Apply manual offset to subtitle timing.
135    ///
136    /// # Arguments
137    ///
138    /// * `subtitle` - Mutable subtitle data to modify
139    /// * `offset_seconds` - Offset in seconds (positive delays, negative advances)
140    ///
141    /// # Returns
142    ///
143    /// Sync result with the applied offset and full confidence.
144    ///
145    /// # Errors
146    ///
147    /// Returns an error if the offset exceeds the configured maximum.
148    pub fn apply_manual_offset(
149        &self,
150        subtitle: &mut Subtitle,
151        offset_seconds: f32,
152    ) -> Result<SyncResult> {
153        debug!(
154            "[SyncEngine] apply_manual_offset called | offset_seconds: {:.3}, entries: {}",
155            offset_seconds,
156            subtitle.entries.len()
157        );
158        // Validate offset against max_offset_seconds configuration
159        if offset_seconds.abs() > self.config.max_offset_seconds {
160            warn!(
161                "[SyncEngine] Offset {:.2}s exceeds maximum allowed value {:.2}s. Aborting.",
162                offset_seconds, self.config.max_offset_seconds
163            );
164            return Err(SubXError::config(format!(
165                "Offset {:.2}s exceeds maximum allowed value {:.2}s. Please check the sync.max_offset_seconds configuration or use a smaller offset.",
166                offset_seconds, self.config.max_offset_seconds
167            )));
168        }
169
170        let start = Instant::now();
171        for entry in &mut subtitle.entries {
172            let offset_dur = Duration::from_secs_f32(offset_seconds.abs());
173            if offset_seconds >= 0.0 {
174                entry.start_time = entry.start_time.checked_add(offset_dur).ok_or_else(|| {
175                    SubXError::audio_processing("Invalid offset results in negative time")
176                })?;
177                entry.end_time = entry.end_time.checked_add(offset_dur).ok_or_else(|| {
178                    SubXError::audio_processing("Invalid offset results in negative time")
179                })?;
180            } else {
181                // For negative offsets, clamp times to zero instead of erroring on underflow
182                entry.start_time = if entry.start_time > offset_dur {
183                    entry.start_time - offset_dur
184                } else {
185                    Duration::ZERO
186                };
187                entry.end_time = if entry.end_time > offset_dur {
188                    entry.end_time - offset_dur
189                } else {
190                    Duration::ZERO
191                };
192            }
193        }
194        debug!(
195            "[SyncEngine] Manual offset applied to all entries | offset_seconds: {:.3}",
196            offset_seconds
197        );
198        Ok(SyncResult {
199            offset_seconds,
200            confidence: 1.0,
201            method_used: SyncMethod::Manual,
202            correlation_peak: 1.0,
203            additional_info: Some(json!({
204                "applied_offset": offset_seconds,
205                "entries_modified": subtitle.entries.len(),
206            })),
207            processing_duration: start.elapsed(),
208            warnings: Vec::new(),
209        })
210    }
211
212    fn determine_default_method(&self) -> SyncMethod {
213        debug!(
214            "[SyncEngine] determine_default_method called | config.default_method: {}",
215            self.config.default_method
216        );
217        match self.config.default_method.as_str() {
218            "vad" => SyncMethod::LocalVad,
219            _ => SyncMethod::Auto,
220        }
221    }
222
223    async fn vad_detect_sync_offset(
224        &self,
225        audio_path: &Path,
226        subtitle: &Subtitle,
227    ) -> Result<SyncResult> {
228        debug!(
229            "[SyncEngine] vad_detect_sync_offset called | audio_path: {:?}, subtitle entries: {}",
230            audio_path,
231            subtitle.entries.len()
232        );
233        let det = self
234            .vad_detector
235            .as_ref()
236            .ok_or_else(|| SubXError::audio_processing("VAD detector not available"))?;
237
238        let mut result = det.detect_sync_offset(audio_path, subtitle, 0).await?; // analysis_window_seconds no longer used
239
240        // Validate detected offset against max_offset_seconds configuration
241        if result.offset_seconds.abs() > self.config.max_offset_seconds {
242            warn!(
243                "[SyncEngine] Detected offset {:.2}s exceeds configured maximum value {:.2}s. Clamping and warning.",
244                result.offset_seconds, self.config.max_offset_seconds
245            );
246
247            // Provide warning but don't completely fail, allow user to decide
248            result.warnings.push(format!(
249                "Detected offset {:.2}s exceeds configured maximum value {:.2}s. Consider checking audio quality or adjusting sync.max_offset_seconds configuration.",
250                result.offset_seconds, self.config.max_offset_seconds
251            ));
252
253            // Optionally: clamp to maximum value (preserving sign)
254            let sign = if result.offset_seconds >= 0.0 {
255                1.0
256            } else {
257                -1.0
258            };
259            let original_offset = result.offset_seconds;
260            result.offset_seconds = sign * self.config.max_offset_seconds;
261
262            result.additional_info = Some(json!({
263                "original_offset": original_offset,
264                "clamped_offset": result.offset_seconds,
265                "reason": "Exceeded max_offset_seconds configuration"
266            }));
267        } else {
268            debug!(
269                "[SyncEngine] VAD sync offset detected | offset_seconds: {:.3}, confidence: {:.3}",
270                result.offset_seconds, result.confidence
271            );
272        }
273
274        Ok(result)
275    }
276}
277
278/// Sync method enumeration.
279///
280/// Defines the available methods for subtitle synchronization,
281/// from automatic detection to manual offset specification.
282#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
283pub enum SyncMethod {
284    /// Automatic method selection (currently VAD only).
285    Auto,
286    /// Local VAD (Voice Activity Detection) processing.
287    LocalVad,
288    /// Manual offset specification.
289    Manual,
290}
291
292/// Synchronization result structure.
293///
294/// Contains the complete results of subtitle synchronization analysis,
295/// including calculated offset, confidence metrics, and processing metadata.
296#[derive(Debug, Clone)]
297pub struct SyncResult {
298    /// Calculated time offset in seconds
299    pub offset_seconds: f32,
300    /// Confidence level of the detection (0.0-1.0)
301    pub confidence: f32,
302    /// Synchronization method that was used
303    pub method_used: SyncMethod,
304    /// Peak correlation value from analysis
305    pub correlation_peak: f32,
306    /// Additional method-specific information
307    pub additional_info: Option<serde_json::Value>,
308    /// Time taken to complete the analysis
309    pub processing_duration: Duration,
310    /// Any warnings generated during processing
311    pub warnings: Vec<String>,
312}
313
314/// Method selection strategy for synchronization analysis.
315///
316/// Defines preferences and fallback behavior for automatic method selection
317/// when multiple synchronization approaches are available.
318#[derive(Debug, Clone)]
319pub struct MethodSelectionStrategy {
320    /// Preferred methods in order of preference
321    pub preferred_methods: Vec<SyncMethod>,
322    /// Minimum confidence threshold for accepting results
323    pub min_confidence_threshold: f32,
324    /// Whether to allow fallback to alternative methods
325    pub allow_fallback: bool,
326    /// Maximum time to spend on analysis attempts
327    pub max_attempt_duration: u32,
328}
329
330// Unit test module: Supplement sync engine core behavior verification
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use crate::config::{TestConfigBuilder, TestConfigService, service::ConfigService};
335    use crate::core::formats::{Subtitle, SubtitleEntry, SubtitleFormatType, SubtitleMetadata};
336    use std::time::Duration;
337
338    #[tokio::test]
339    async fn test_sync_engine_creation() {
340        let config = TestConfigBuilder::new()
341            .with_vad_enabled(true)
342            .build_config();
343        let config_service = TestConfigService::new(config);
344        let result = SyncEngine::new(config_service.get_config().unwrap().sync);
345        assert!(result.is_ok());
346    }
347
348    #[tokio::test]
349    async fn test_manual_offset_application() {
350        let config = TestConfigBuilder::new().build_config();
351        let config_service = TestConfigService::new(config);
352        let engine = SyncEngine::new(config_service.get_config().unwrap().sync).unwrap();
353
354        let mut subtitle = create_test_subtitle();
355        let original_start = subtitle.entries[0].start_time;
356
357        let result = engine.apply_manual_offset(&mut subtitle, 2.5).unwrap();
358        assert_eq!(result.offset_seconds, 2.5);
359        assert_eq!(result.method_used, SyncMethod::Manual);
360        assert_eq!(result.confidence, 1.0);
361
362        let expected_start = original_start + Duration::from_secs_f32(2.5);
363        assert_eq!(subtitle.entries[0].start_time, expected_start);
364    }
365
366    #[tokio::test]
367    async fn test_manual_offset_negative_application() {
368        let config = TestConfigBuilder::new().build_config();
369        let config_service = TestConfigService::new(config);
370        let engine = SyncEngine::new(config_service.get_config().unwrap().sync).unwrap();
371
372        let mut subtitle = create_test_subtitle();
373        let original_start = subtitle.entries[0].start_time;
374
375        let result = engine.apply_manual_offset(&mut subtitle, -2.5).unwrap();
376        assert_eq!(result.offset_seconds, -2.5);
377
378        let expected_start = original_start - Duration::from_secs_f32(2.5);
379        assert_eq!(subtitle.entries[0].start_time, expected_start);
380    }
381
382    #[tokio::test]
383    async fn test_determine_default_method() {
384        let test_cases = vec![("vad", SyncMethod::LocalVad), ("unknown", SyncMethod::Auto)];
385
386        for (config_value, expected_method) in test_cases {
387            let config = TestConfigBuilder::new()
388                .with_sync_method(config_value)
389                .build_config();
390            let engine = SyncEngine::new(config.sync).unwrap();
391            assert_eq!(engine.determine_default_method(), expected_method);
392        }
393    }
394
395    #[tokio::test]
396    async fn test_method_selection_strategy_struct() {
397        let strategy = MethodSelectionStrategy {
398            preferred_methods: vec![SyncMethod::LocalVad],
399            min_confidence_threshold: 0.7,
400            allow_fallback: true,
401            max_attempt_duration: 60,
402        };
403        assert_eq!(strategy.preferred_methods.len(), 1);
404        assert!(strategy.allow_fallback);
405    }
406
407    fn create_test_subtitle() -> Subtitle {
408        Subtitle {
409            entries: vec![SubtitleEntry::new(
410                1,
411                Duration::from_secs(10),
412                Duration::from_secs(12),
413                "Test subtitle".to_string(),
414            )],
415            metadata: SubtitleMetadata::default(),
416            format: SubtitleFormatType::Srt,
417        }
418    }
419}