1#![allow(dead_code)]
7#![allow(clippy::cast_precision_loss)]
8#![allow(clippy::too_many_arguments)]
9
10use serde::{Deserialize, Serialize};
11
12pub const ITU_TOLERANCE_MS: f64 = 45.0; pub const COMFORTABLE_TOLERANCE_MS: f64 = 90.0;
17
18#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
20pub struct AvOffset {
21 pub offset_ms: f64,
23 pub confidence: f64,
25 pub method: DetectionMethod,
27}
28
29impl AvOffset {
30 #[must_use]
32 pub fn new(offset_ms: f64, confidence: f64, method: DetectionMethod) -> Self {
33 Self {
34 offset_ms,
35 confidence,
36 method,
37 }
38 }
39
40 #[must_use]
42 pub fn to_samples(&self, sample_rate: u32) -> i64 {
43 (self.offset_ms * f64::from(sample_rate) / 1000.0).round() as i64
44 }
45
46 #[must_use]
48 pub fn to_frames(&self, fps: f64) -> f64 {
49 self.offset_ms * fps / 1000.0
50 }
51
52 #[must_use]
54 pub fn within_itu_tolerance(&self) -> bool {
55 self.offset_ms.abs() <= ITU_TOLERANCE_MS
56 }
57
58 #[must_use]
60 pub fn within_comfortable_tolerance(&self) -> bool {
61 self.offset_ms.abs() <= COMFORTABLE_TOLERANCE_MS
62 }
63}
64
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
67pub enum DetectionMethod {
68 AudioMotionCorrelation,
70 SpeechOnset,
72 MouthMovement,
74 ClapperBoard,
76 Manual,
78 Hybrid,
80}
81
82#[derive(Debug, Clone)]
84pub struct LipSyncConfig {
85 pub window_ms: f64,
87 pub search_range_ms: f64,
89 pub min_confidence: f64,
91 pub sample_rate: u32,
93 pub fps: f64,
95}
96
97impl Default for LipSyncConfig {
98 fn default() -> Self {
99 Self {
100 window_ms: 500.0,
101 search_range_ms: 500.0,
102 min_confidence: 0.6,
103 sample_rate: 48000,
104 fps: 25.0,
105 }
106 }
107}
108
109impl LipSyncConfig {
110 #[must_use]
112 pub fn new(window_ms: f64, search_range_ms: f64, sample_rate: u32, fps: f64) -> Self {
113 Self {
114 window_ms,
115 search_range_ms,
116 min_confidence: 0.6,
117 sample_rate,
118 fps,
119 }
120 }
121
122 #[must_use]
124 pub fn window_samples(&self) -> usize {
125 (self.window_ms * f64::from(self.sample_rate) / 1000.0) as usize
126 }
127
128 #[must_use]
130 pub fn search_range_samples(&self) -> usize {
131 (self.search_range_ms * f64::from(self.sample_rate) / 1000.0) as usize
132 }
133}
134
135#[derive(Debug, Clone, Copy)]
137pub struct LipSyncCorrection {
138 pub audio_delay_ms: f64,
140 pub video_delay_ms: f64,
142 pub needs_correction: bool,
144}
145
146impl LipSyncCorrection {
147 #[must_use]
150 pub fn from_offset(offset: &AvOffset, tolerance_ms: f64) -> Self {
151 if offset.offset_ms.abs() <= tolerance_ms {
152 return Self {
153 audio_delay_ms: 0.0,
154 video_delay_ms: 0.0,
155 needs_correction: false,
156 };
157 }
158
159 if offset.offset_ms > 0.0 {
161 Self {
163 audio_delay_ms: offset.offset_ms,
164 video_delay_ms: 0.0,
165 needs_correction: true,
166 }
167 } else {
168 Self {
170 audio_delay_ms: 0.0,
171 video_delay_ms: -offset.offset_ms,
172 needs_correction: true,
173 }
174 }
175 }
176
177 #[must_use]
179 pub fn magnitude_ms(&self) -> f64 {
180 self.audio_delay_ms + self.video_delay_ms
181 }
182}
183
184#[derive(Debug, Clone)]
186pub struct LipSyncAnalyzer {
187 config: LipSyncConfig,
188 offset_history: Vec<AvOffset>,
190}
191
192impl LipSyncAnalyzer {
193 #[must_use]
195 pub fn new(config: LipSyncConfig) -> Self {
196 Self {
197 config,
198 offset_history: Vec::new(),
199 }
200 }
201
202 pub fn detect_offset_from_envelopes(
204 &mut self,
205 audio_envelope: &[f32],
206 video_activity: &[f32],
207 ) -> Option<AvOffset> {
208 if audio_envelope.is_empty() || video_activity.is_empty() {
209 return None;
210 }
211
212 let max_lag = self
213 .config
214 .search_range_samples()
215 .min(audio_envelope.len() / 2);
216 let window = self.config.window_samples().min(audio_envelope.len());
217
218 let mut best_lag = 0i64;
219 let mut best_corr = f64::NEG_INFINITY;
220
221 for lag in -(max_lag as i64)..=(max_lag as i64) {
222 let corr = cross_correlate_at_lag(audio_envelope, video_activity, lag, window);
223 if corr > best_corr {
224 best_corr = corr;
225 best_lag = lag;
226 }
227 }
228
229 let audio_power: f64 = audio_envelope
231 .iter()
232 .map(|&x| f64::from(x) * f64::from(x))
233 .sum::<f64>()
234 / audio_envelope.len() as f64;
235 let video_power: f64 = video_activity
236 .iter()
237 .map(|&x| f64::from(x) * f64::from(x))
238 .sum::<f64>()
239 / video_activity.len() as f64;
240
241 let max_possible = (audio_power * video_power).sqrt() * window as f64;
242 let confidence = if max_possible > 0.0 {
243 (best_corr / max_possible).clamp(0.0, 1.0)
244 } else {
245 0.0
246 };
247
248 let offset_ms = best_lag as f64 / f64::from(self.config.sample_rate) * 1000.0;
249 let offset = AvOffset::new(
250 offset_ms,
251 confidence,
252 DetectionMethod::AudioMotionCorrelation,
253 );
254
255 if confidence >= self.config.min_confidence {
256 self.offset_history.push(offset);
257 }
258
259 Some(offset)
260 }
261
262 #[must_use]
264 pub fn median_offset(&self) -> Option<f64> {
265 if self.offset_history.is_empty() {
266 return None;
267 }
268 let mut offsets: Vec<f64> = self.offset_history.iter().map(|o| o.offset_ms).collect();
269 offsets.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
270 let mid = offsets.len() / 2;
271 Some(if offsets.len() % 2 == 0 {
272 (offsets[mid - 1] + offsets[mid]) / 2.0
273 } else {
274 offsets[mid]
275 })
276 }
277
278 #[must_use]
280 pub fn recommend_correction(&self, tolerance_ms: f64) -> Option<LipSyncCorrection> {
281 let median = self.median_offset()?;
282 let offset = AvOffset::new(median, 1.0, DetectionMethod::Hybrid);
283 Some(LipSyncCorrection::from_offset(&offset, tolerance_ms))
284 }
285
286 pub fn clear_history(&mut self) {
288 self.offset_history.clear();
289 }
290
291 #[must_use]
293 pub fn history_len(&self) -> usize {
294 self.offset_history.len()
295 }
296}
297
298fn cross_correlate_at_lag(a: &[f32], b: &[f32], lag: i64, window: usize) -> f64 {
300 let mut sum = 0.0_f64;
301 let n = window.min(a.len()).min(b.len());
302 for i in 0..n {
303 let j = i as i64 + lag;
304 if j >= 0 && (j as usize) < b.len() {
305 sum += f64::from(a[i]) * f64::from(b[j as usize]);
306 }
307 }
308 sum
309}
310
311#[derive(Debug, Clone, Copy)]
313pub struct ToleranceChecker {
314 pub itu_tolerance_ms: f64,
316 pub custom_tolerance_ms: f64,
318}
319
320impl ToleranceChecker {
321 #[must_use]
323 pub fn new(custom_tolerance_ms: f64) -> Self {
324 Self {
325 itu_tolerance_ms: ITU_TOLERANCE_MS,
326 custom_tolerance_ms,
327 }
328 }
329
330 #[must_use]
332 pub fn passes_itu(&self, offset_ms: f64) -> bool {
333 offset_ms.abs() <= self.itu_tolerance_ms
334 }
335
336 #[must_use]
338 pub fn passes_custom(&self, offset_ms: f64) -> bool {
339 offset_ms.abs() <= self.custom_tolerance_ms
340 }
341
342 #[must_use]
344 pub fn severity(&self, offset_ms: f64) -> SyncSeverity {
345 let abs_ms = offset_ms.abs();
346 if abs_ms <= ITU_TOLERANCE_MS {
347 SyncSeverity::None
348 } else if abs_ms <= COMFORTABLE_TOLERANCE_MS {
349 SyncSeverity::Minor
350 } else if abs_ms <= 200.0 {
351 SyncSeverity::Moderate
352 } else {
353 SyncSeverity::Severe
354 }
355 }
356}
357
358impl Default for ToleranceChecker {
359 fn default() -> Self {
360 Self::new(ITU_TOLERANCE_MS)
361 }
362}
363
364#[derive(Debug, Clone, Copy, PartialEq, Eq)]
366pub enum SyncSeverity {
367 None,
369 Minor,
371 Moderate,
373 Severe,
375}
376
377#[cfg(test)]
378mod tests {
379 use super::*;
380
381 #[test]
382 fn test_av_offset_creation() {
383 let offset = AvOffset::new(20.0, 0.9, DetectionMethod::Manual);
384 assert!((offset.offset_ms - 20.0).abs() < f64::EPSILON);
385 assert!((offset.confidence - 0.9).abs() < f64::EPSILON);
386 assert_eq!(offset.method, DetectionMethod::Manual);
387 }
388
389 #[test]
390 fn test_av_offset_to_samples() {
391 let offset = AvOffset::new(100.0, 0.9, DetectionMethod::Manual);
392 assert_eq!(offset.to_samples(48000), 4800);
393 }
394
395 #[test]
396 fn test_av_offset_to_frames() {
397 let offset = AvOffset::new(40.0, 0.9, DetectionMethod::Manual);
398 let frames = offset.to_frames(25.0);
399 assert!((frames - 1.0).abs() < 1e-6);
400 }
401
402 #[test]
403 fn test_av_offset_itu_tolerance() {
404 let within = AvOffset::new(40.0, 0.9, DetectionMethod::Manual);
405 assert!(within.within_itu_tolerance());
406
407 let outside = AvOffset::new(50.0, 0.9, DetectionMethod::Manual);
408 assert!(!outside.within_itu_tolerance());
409 }
410
411 #[test]
412 fn test_av_offset_comfortable_tolerance() {
413 let within = AvOffset::new(80.0, 0.9, DetectionMethod::Manual);
414 assert!(within.within_comfortable_tolerance());
415
416 let outside = AvOffset::new(100.0, 0.9, DetectionMethod::Manual);
417 assert!(!outside.within_comfortable_tolerance());
418 }
419
420 #[test]
421 fn test_lip_sync_config_default() {
422 let config = LipSyncConfig::default();
423 assert_eq!(config.sample_rate, 48000);
424 assert!((config.fps - 25.0).abs() < f64::EPSILON);
425 }
426
427 #[test]
428 fn test_lip_sync_config_window_samples() {
429 let config = LipSyncConfig::default(); assert_eq!(config.window_samples(), 24000);
431 }
432
433 #[test]
434 fn test_lip_sync_correction_no_correction_needed() {
435 let offset = AvOffset::new(10.0, 0.9, DetectionMethod::Manual);
436 let correction = LipSyncCorrection::from_offset(&offset, 45.0);
437 assert!(!correction.needs_correction);
438 }
439
440 #[test]
441 fn test_lip_sync_correction_audio_ahead() {
442 let offset = AvOffset::new(100.0, 0.9, DetectionMethod::Manual);
443 let correction = LipSyncCorrection::from_offset(&offset, 45.0);
444 assert!(correction.needs_correction);
445 assert!(correction.audio_delay_ms > 0.0);
446 assert_eq!(correction.video_delay_ms, 0.0);
447 }
448
449 #[test]
450 fn test_lip_sync_correction_video_ahead() {
451 let offset = AvOffset::new(-100.0, 0.9, DetectionMethod::Manual);
452 let correction = LipSyncCorrection::from_offset(&offset, 45.0);
453 assert!(correction.needs_correction);
454 assert_eq!(correction.audio_delay_ms, 0.0);
455 assert!(correction.video_delay_ms > 0.0);
456 }
457
458 #[test]
459 fn test_lip_sync_correction_magnitude() {
460 let offset = AvOffset::new(100.0, 0.9, DetectionMethod::Manual);
461 let correction = LipSyncCorrection::from_offset(&offset, 45.0);
462 assert!((correction.magnitude_ms() - 100.0).abs() < f64::EPSILON);
463 }
464
465 #[test]
466 fn test_analyzer_detect_from_envelopes() {
467 let config = LipSyncConfig::new(100.0, 200.0, 48000, 25.0);
468 let mut analyzer = LipSyncAnalyzer::new(config);
469
470 let n = 5000;
472 let mut audio = vec![0.0f32; n];
473 let mut video = vec![0.0f32; n];
474
475 audio[1000] = 1.0;
477 audio[1001] = 0.8;
478 video[1100] = 1.0;
479 video[1101] = 0.8;
480
481 let result = analyzer.detect_offset_from_envelopes(&audio, &video);
482 assert!(result.is_some());
483 }
484
485 #[test]
486 fn test_analyzer_median_offset_empty() {
487 let analyzer = LipSyncAnalyzer::new(LipSyncConfig::default());
488 assert!(analyzer.median_offset().is_none());
489 }
490
491 #[test]
492 fn test_analyzer_clear_history() {
493 let config = LipSyncConfig::default();
494 let mut analyzer = LipSyncAnalyzer::new(config);
495 analyzer
497 .offset_history
498 .push(AvOffset::new(10.0, 0.9, DetectionMethod::Manual));
499 assert_eq!(analyzer.history_len(), 1);
500 analyzer.clear_history();
501 assert_eq!(analyzer.history_len(), 0);
502 }
503
504 #[test]
505 fn test_tolerance_checker_itu() {
506 let checker = ToleranceChecker::default();
507 assert!(checker.passes_itu(44.9));
508 assert!(!checker.passes_itu(45.1));
509 }
510
511 #[test]
512 fn test_tolerance_checker_severity() {
513 let checker = ToleranceChecker::default();
514 assert_eq!(checker.severity(30.0), SyncSeverity::None);
515 assert_eq!(checker.severity(70.0), SyncSeverity::Minor);
516 assert_eq!(checker.severity(150.0), SyncSeverity::Moderate);
517 assert_eq!(checker.severity(250.0), SyncSeverity::Severe);
518 }
519
520 #[test]
521 fn test_cross_correlate_at_lag() {
522 let a = vec![1.0f32, 0.0, 0.0, 0.0, 0.0];
523 let b = vec![0.0f32, 1.0, 0.0, 0.0, 0.0];
524 let corr = cross_correlate_at_lag(&a, &b, 1, 5);
526 assert!((corr - 1.0).abs() < 1e-6);
527 }
528}