oximedia_align/
audio_align.rs

1//! Audio-to-video alignment utilities.
2//!
3//! Provides tools for synchronising audio tracks to video using clap detection,
4//! waveform cross-correlation, and drift computation.
5
6use serde::{Deserialize, Serialize};
7
8/// Method used to achieve audio/video synchronisation.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
10#[allow(dead_code)]
11pub enum SyncMethod {
12    /// Clapper-board detected in audio.
13    Clap,
14    /// Timecode embedded in the stream.
15    Timecode,
16    /// Waveform cross-correlation.
17    Waveform,
18    /// Manually specified offset.
19    Manual,
20}
21
22impl std::fmt::Display for SyncMethod {
23    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24        match self {
25            Self::Clap => write!(f, "Clap"),
26            Self::Timecode => write!(f, "Timecode"),
27            Self::Waveform => write!(f, "Waveform"),
28            Self::Manual => write!(f, "Manual"),
29        }
30    }
31}
32
33/// The result of an audio/video synchronisation analysis.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct AudioVideoSync {
36    /// Milliseconds to add to the video presentation time to align it with the
37    /// audio.  Negative values mean the video must be shifted earlier.
38    pub video_offset_ms: i64,
39    /// Confidence in the sync measurement (0.0 – 1.0).
40    pub confidence: f64,
41    /// The method used to establish synchronisation.
42    pub method: SyncMethod,
43}
44
45impl AudioVideoSync {
46    /// Create a new sync result.
47    #[must_use]
48    pub fn new(video_offset_ms: i64, confidence: f64, method: SyncMethod) -> Self {
49        Self {
50            video_offset_ms,
51            confidence,
52            method,
53        }
54    }
55
56    /// Returns `true` when the sync confidence exceeds the given threshold.
57    #[must_use]
58    pub fn is_reliable(&self, threshold: f64) -> bool {
59        self.confidence >= threshold
60    }
61}
62
63/// Summary report describing the sync state between audio and video tracks.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct SyncReport {
66    /// Duration of the video track in milliseconds.
67    pub video_duration_ms: u64,
68    /// Duration of the audio track in milliseconds.
69    pub audio_duration_ms: u64,
70    /// Sync offset at the beginning of the clip (milliseconds).
71    pub sync_offset_ms: i64,
72    /// Drift in parts-per-million between audio and video clocks.
73    pub drift_ppm: f64,
74}
75
76impl SyncReport {
77    /// Create a new sync report.
78    #[must_use]
79    pub fn new(
80        video_duration_ms: u64,
81        audio_duration_ms: u64,
82        sync_offset_ms: i64,
83        drift_ppm: f64,
84    ) -> Self {
85        Self {
86            video_duration_ms,
87            audio_duration_ms,
88            sync_offset_ms,
89            drift_ppm,
90        }
91    }
92
93    /// `true` when the drift magnitude is small enough to be negligible
94    /// (less than 1 ppm absolute).
95    #[must_use]
96    pub fn is_in_sync(&self) -> bool {
97        self.drift_ppm.abs() < 1.0
98    }
99
100    /// Difference in duration (audio minus video) in milliseconds.
101    #[must_use]
102    pub fn duration_delta_ms(&self) -> i64 {
103        self.audio_duration_ms as i64 - self.video_duration_ms as i64
104    }
105}
106
107// ── Clap detection ────────────────────────────────────────────────────────────
108
109/// Detect a sharp transient (clap) in a mono audio stream.
110///
111/// # Arguments
112///
113/// * `samples` – Normalised f64 samples in [-1.0, 1.0].
114/// * `sample_rate` – Samples per second.
115///
116/// # Returns
117///
118/// The timestamp (in milliseconds from the start) of the loudest detected
119/// transient, or `None` if the signal is empty or featureless.
120#[must_use]
121pub fn detect_clap(samples: &[f64], sample_rate: u32) -> Option<u64> {
122    if samples.is_empty() || sample_rate == 0 {
123        return None;
124    }
125
126    let sr = sample_rate as usize;
127
128    // Compute a simple onset strength as the rectified first-order difference
129    // between sample absolute values (so-called "spectral flux" on raw samples).
130    let window = (sr / 100).max(1); // ~10 ms smoothing window
131
132    // Smooth the absolute signal
133    let abs_samples: Vec<f64> = samples.iter().map(|&s| s.abs()).collect();
134    let smoothed: Vec<f64> = abs_samples
135        .windows(window)
136        .map(|w| w.iter().sum::<f64>() / w.len() as f64)
137        .collect();
138
139    // Compute first-order difference (onset strength)
140    let onset: Vec<f64> = smoothed
141        .windows(2)
142        .map(|w| (w[1] - w[0]).max(0.0))
143        .collect();
144
145    // Find global maximum
146    let (best_idx, best_val) = onset
147        .iter()
148        .enumerate()
149        .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))?;
150
151    // Require the transient to be significant relative to the smoothing window.
152    // A full-scale step across the window produces onset ≈ 1/window, so set the
153    // threshold at ~5% of that.
154    let min_onset = 0.05 / window as f64;
155    if *best_val < min_onset {
156        return None;
157    }
158
159    // Convert from smoothed-difference index to original sample index
160    let sample_idx = best_idx + window; // approximate
161    let ms = (sample_idx as u64 * 1000) / u64::from(sample_rate);
162    Some(ms)
163}
164
165// ── Cross-correlation ─────────────────────────────────────────────────────────
166
167/// Compute the full (linear) cross-correlation of two f32 arrays.
168///
169/// The output has length `a.len() + b.len() - 1`.
170/// The centre element (index `b.len() - 1`) corresponds to zero lag.
171#[must_use]
172pub fn cross_correlate_waveforms(a: &[f32], b: &[f32]) -> Vec<f32> {
173    if a.is_empty() || b.is_empty() {
174        return Vec::new();
175    }
176
177    let len = a.len() + b.len() - 1;
178    let mut result = vec![0.0_f32; len];
179
180    // Cross-correlation: corr[lag] = sum_n a[n] * b[n - lag + (b.len()-1)]
181    // lag index in [0, len-1], where index b.len()-1 is zero-lag
182    for (i, &ai) in a.iter().enumerate() {
183        for (j, &bj) in b.iter().enumerate() {
184            // lag_index = j - i + (b.len() - 1)  [corr[lag] = sum_n a[n]*b[n+lag]]
185            let lag_index = j as isize - i as isize + (b.len() as isize - 1);
186            if lag_index >= 0 && (lag_index as usize) < len {
187                result[lag_index as usize] += ai * bj;
188            }
189        }
190    }
191
192    result
193}
194
195/// Find the lag (in samples) at which two waveforms are best aligned.
196///
197/// Returns the lag `d` such that shifting `b` by `d` samples aligns it with
198/// `a`.  Positive `d` means `b` starts later than `a`.
199///
200/// Returns 0 when either slice is empty.
201#[must_use]
202pub fn find_max_correlation_offset(a: &[f32], b: &[f32]) -> i32 {
203    if a.is_empty() || b.is_empty() {
204        return 0;
205    }
206
207    let corr = cross_correlate_waveforms(a, b);
208
209    // Peak index in the correlation vector
210    let peak_idx = corr
211        .iter()
212        .enumerate()
213        .max_by(|(_, x), (_, y)| x.partial_cmp(y).unwrap_or(std::cmp::Ordering::Equal))
214        .map_or(0, |(i, _)| i);
215
216    // The zero-lag index in the full cross-correlation is `b.len() - 1`
217    let zero_lag = (b.len() as i32) - 1;
218    peak_idx as i32 - zero_lag
219}
220
221// ── Drift computation ─────────────────────────────────────────────────────────
222
223/// Compute the clock drift between audio and video in parts-per-million.
224///
225/// # Arguments
226///
227/// * `start_offset_ms` – Sync offset measured at the beginning of the clip.
228/// * `end_offset_ms` – Sync offset measured at the end of the clip.
229/// * `duration_ms` – Duration of the clip in milliseconds.
230///
231/// # Returns
232///
233/// Drift in ppm.  A positive value means the audio clock runs faster than the
234/// video clock.  Returns 0.0 when `duration_ms` is zero.
235#[must_use]
236pub fn compute_drift(start_offset_ms: i64, end_offset_ms: i64, duration_ms: u64) -> f64 {
237    if duration_ms == 0 {
238        return 0.0;
239    }
240
241    let delta_ms = (end_offset_ms - start_offset_ms) as f64;
242    (delta_ms / duration_ms as f64) * 1_000_000.0
243}
244
245// ── Spectral (phase-correlation) audio alignment ──────────────────────────────
246
247/// Configuration for spectral audio alignment.
248#[derive(Debug, Clone)]
249pub struct SpectralAlignConfig {
250    /// FFT size (should be a power of two for efficiency). The input signals
251    /// will be zero-padded to at least this length.
252    pub fft_size: usize,
253    /// Maximum lag to search (in samples). If `None`, the full FFT range is
254    /// searched.
255    pub max_lag: Option<usize>,
256}
257
258impl Default for SpectralAlignConfig {
259    fn default() -> Self {
260        Self {
261            fft_size: 8192,
262            max_lag: None,
263        }
264    }
265}
266
267/// Result of spectral alignment.
268#[derive(Debug, Clone)]
269pub struct SpectralAlignResult {
270    /// The detected offset in samples.  Positive means `b` should be shifted
271    /// *later* (it starts before `a`).
272    pub offset_samples: i32,
273    /// Peak normalised cross-power spectrum value (higher = more confident).
274    pub peak_value: f64,
275    /// Confidence score in [0, 1].
276    pub confidence: f64,
277}
278
279/// Find the alignment offset between two audio signals using phase correlation
280/// in the frequency domain.
281///
282/// This is the spectral equivalent of time-domain cross-correlation: we compute
283/// the normalised cross-power spectrum and inverse-FFT it to get the
284/// generalised cross-correlation (GCC-PHAT), then pick the lag with the
285/// largest peak.
286///
287/// Phase correlation is more robust than plain cross-correlation for signals
288/// with different amplitude envelopes (e.g. different microphone gains)
289/// because it whitens the magnitude spectrum.
290///
291/// # Arguments
292///
293/// * `a` -- First audio signal (normalised f32 samples).
294/// * `b` -- Second audio signal (normalised f32 samples).
295/// * `config` -- Spectral alignment configuration.
296///
297/// # Returns
298///
299/// A [`SpectralAlignResult`] with the detected offset.  Returns offset 0 with
300/// confidence 0 if either signal is empty.
301#[must_use]
302pub fn spectral_align(a: &[f32], b: &[f32], config: &SpectralAlignConfig) -> SpectralAlignResult {
303    if a.is_empty() || b.is_empty() {
304        return SpectralAlignResult {
305            offset_samples: 0,
306            peak_value: 0.0,
307            confidence: 0.0,
308        };
309    }
310
311    // Determine FFT size: next power-of-two >= max(len_a, len_b, config.fft_size)
312    let min_len = a.len().max(b.len()).max(config.fft_size);
313    let n = min_len.next_power_of_two();
314
315    // Zero-pad both signals to length n
316    let mut ra = vec![0.0_f64; n];
317    let mut ia = vec![0.0_f64; n];
318    for (i, &v) in a.iter().enumerate() {
319        ra[i] = f64::from(v);
320    }
321
322    let mut rb = vec![0.0_f64; n];
323    let mut ib = vec![0.0_f64; n];
324    for (i, &v) in b.iter().enumerate() {
325        rb[i] = f64::from(v);
326    }
327
328    // Forward FFT of both signals
329    fft_in_place(&mut ra, &mut ia, false);
330    fft_in_place(&mut rb, &mut ib, false);
331
332    // Compute cross-power spectrum with smoothed phase normalisation.
333    // We use a regularised version: R(k) = A(k) * conj(B(k)) / (|A(k)*conj(B(k))| + eps)
334    // where eps prevents division by zero for zero-padded regions.
335    // This is a mild form of GCC-PHAT that retains some magnitude weighting
336    // for better performance with zero-padded signals.
337    let mut cr = vec![0.0_f64; n];
338    let mut ci = vec![0.0_f64; n];
339
340    // Compute a regularisation threshold based on average magnitude
341    let mut sum_mag = 0.0_f64;
342    for k in 0..n {
343        let xr = ra[k] * rb[k] + ia[k] * ib[k];
344        let xi = ia[k] * rb[k] - ra[k] * ib[k];
345        sum_mag += (xr * xr + xi * xi).sqrt();
346    }
347    let eps = (sum_mag / n as f64) * 0.01 + 1e-15;
348
349    for k in 0..n {
350        // A * conj(B) = (ra+j*ia)*(rb-j*ib) = (ra*rb+ia*ib) + j*(ia*rb-ra*ib)
351        let xr = ra[k] * rb[k] + ia[k] * ib[k];
352        let xi = ia[k] * rb[k] - ra[k] * ib[k];
353        let mag = (xr * xr + xi * xi).sqrt();
354        let denom = mag + eps;
355        cr[k] = xr / denom;
356        ci[k] = xi / denom;
357    }
358
359    // Inverse FFT to get generalised cross-correlation
360    fft_in_place(&mut cr, &mut ci, true);
361
362    // Search for peak within the allowed lag range
363    let max_lag = config.max_lag.unwrap_or(n / 2);
364    let max_lag = max_lag.min(n / 2);
365
366    let mut best_idx = 0usize;
367    let mut best_val = f64::NEG_INFINITY;
368
369    // Positive lags: indices 0..max_lag
370    for i in 0..max_lag.min(n) {
371        if cr[i] > best_val {
372            best_val = cr[i];
373            best_idx = i;
374        }
375    }
376    // Negative lags: indices n-max_lag..n
377    let start = if max_lag < n { n - max_lag } else { 0 };
378    for i in start..n {
379        if cr[i] > best_val {
380            best_val = cr[i];
381            best_idx = i;
382        }
383    }
384
385    // Convert index to signed lag
386    let offset = if best_idx <= n / 2 {
387        best_idx as i32
388    } else {
389        best_idx as i32 - n as i32
390    };
391
392    // Compute confidence as the peak value relative to the RMS of the
393    // correlation (a sharp peak means high confidence).
394    let rms = (cr.iter().map(|v| v * v).sum::<f64>() / n as f64).sqrt();
395    let confidence = if rms > 1e-15 {
396        (best_val / (rms * (n as f64).sqrt())).clamp(0.0, 1.0)
397    } else {
398        0.0
399    };
400
401    // Negate so that a positive result means "b is delayed (should be shifted
402    // later)" which matches the documented convention.
403    SpectralAlignResult {
404        offset_samples: -offset,
405        peak_value: best_val,
406        confidence,
407    }
408}
409
410// ── Radix-2 Cooley-Tukey FFT ─────────────────────────────────────────────────
411
412/// In-place radix-2 Cooley-Tukey FFT (or inverse FFT when `inverse` is true).
413///
414/// `re` and `im` must have the same length, which must be a power of two.
415fn fft_in_place(re: &mut [f64], im: &mut [f64], inverse: bool) {
416    let n = re.len();
417    debug_assert_eq!(n, im.len());
418    if n <= 1 {
419        return;
420    }
421    debug_assert!(n.is_power_of_two());
422
423    // Bit-reversal permutation
424    let mut j = 0usize;
425    for i in 0..n {
426        if i < j {
427            re.swap(i, j);
428            im.swap(i, j);
429        }
430        let mut m = n >> 1;
431        while m >= 1 && j >= m {
432            j -= m;
433            m >>= 1;
434        }
435        j += m;
436    }
437
438    // Butterfly stages
439    let sign: f64 = if inverse { 1.0 } else { -1.0 };
440    let mut len = 2;
441    while len <= n {
442        let half = len / 2;
443        let angle = sign * std::f64::consts::PI * 2.0 / len as f64;
444        let wn_r = angle.cos();
445        let wn_i = angle.sin();
446
447        let mut start = 0;
448        while start < n {
449            let mut wr = 1.0_f64;
450            let mut wi = 0.0_f64;
451            for k in 0..half {
452                let even = start + k;
453                let odd = start + k + half;
454                let tr = wr * re[odd] - wi * im[odd];
455                let ti = wr * im[odd] + wi * re[odd];
456                re[odd] = re[even] - tr;
457                im[odd] = im[even] - ti;
458                re[even] += tr;
459                im[even] += ti;
460                let new_wr = wr * wn_r - wi * wn_i;
461                wi = wr * wn_i + wi * wn_r;
462                wr = new_wr;
463            }
464            start += len;
465        }
466        len <<= 1;
467    }
468
469    // For inverse FFT, divide by n
470    if inverse {
471        let inv_n = 1.0 / n as f64;
472        for v in re.iter_mut() {
473            *v *= inv_n;
474        }
475        for v in im.iter_mut() {
476            *v *= inv_n;
477        }
478    }
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484
485    // ── SyncMethod ────────────────────────────────────────────────────────────
486
487    #[test]
488    fn test_sync_method_display() {
489        assert_eq!(SyncMethod::Clap.to_string(), "Clap");
490        assert_eq!(SyncMethod::Timecode.to_string(), "Timecode");
491        assert_eq!(SyncMethod::Waveform.to_string(), "Waveform");
492        assert_eq!(SyncMethod::Manual.to_string(), "Manual");
493    }
494
495    // ── AudioVideoSync ────────────────────────────────────────────────────────
496
497    #[test]
498    fn test_audio_video_sync_is_reliable_pass() {
499        let sync = AudioVideoSync::new(100, 0.9, SyncMethod::Clap);
500        assert!(sync.is_reliable(0.8));
501    }
502
503    #[test]
504    fn test_audio_video_sync_is_reliable_fail() {
505        let sync = AudioVideoSync::new(100, 0.5, SyncMethod::Waveform);
506        assert!(!sync.is_reliable(0.8));
507    }
508
509    #[test]
510    fn test_audio_video_sync_fields() {
511        let sync = AudioVideoSync::new(-250, 0.75, SyncMethod::Timecode);
512        assert_eq!(sync.video_offset_ms, -250);
513        assert_eq!(sync.method, SyncMethod::Timecode);
514    }
515
516    // ── SyncReport ────────────────────────────────────────────────────────────
517
518    #[test]
519    fn test_sync_report_duration_delta() {
520        let r = SyncReport::new(60_000, 60_033, 0, 0.55);
521        assert_eq!(r.duration_delta_ms(), 33);
522    }
523
524    #[test]
525    fn test_sync_report_is_in_sync_true() {
526        let r = SyncReport::new(60_000, 60_000, 0, 0.1);
527        assert!(r.is_in_sync());
528    }
529
530    #[test]
531    fn test_sync_report_is_in_sync_false() {
532        let r = SyncReport::new(60_000, 60_000, 0, 5.0);
533        assert!(!r.is_in_sync());
534    }
535
536    // ── detect_clap ───────────────────────────────────────────────────────────
537
538    #[test]
539    fn test_detect_clap_empty() {
540        assert!(detect_clap(&[], 48000).is_none());
541    }
542
543    #[test]
544    fn test_detect_clap_zero_sample_rate() {
545        let samples = vec![0.0_f64; 100];
546        assert!(detect_clap(&samples, 0).is_none());
547    }
548
549    #[test]
550    fn test_detect_clap_silent_signal() {
551        let samples = vec![0.0_f64; 48000];
552        // Silent signal – no significant transient
553        assert!(detect_clap(&samples, 48000).is_none());
554    }
555
556    #[test]
557    fn test_detect_clap_finds_transient() {
558        // Place a wide transient spike at 1 second (~500 samples = ~10 ms)
559        let mut samples = vec![0.01_f64; 48000 * 2];
560        for i in 0..500 {
561            samples[48000 + i] = 1.0;
562        }
563        let ts = detect_clap(&samples, 48000);
564        assert!(ts.is_some());
565        let ms = ts.expect("ms should be valid");
566        // Expect roughly around 1000 ms (within ±200 ms to account for smoothing)
567        assert!(ms > 800 && ms < 1300, "timestamp={ms}");
568    }
569
570    // ── cross_correlate_waveforms ─────────────────────────────────────────────
571
572    #[test]
573    fn test_cross_correlate_empty() {
574        assert!(cross_correlate_waveforms(&[], &[1.0]).is_empty());
575    }
576
577    #[test]
578    fn test_cross_correlate_output_length() {
579        let a = vec![1.0_f32; 5];
580        let b = vec![1.0_f32; 3];
581        let corr = cross_correlate_waveforms(&a, &b);
582        assert_eq!(corr.len(), 7); // 5 + 3 - 1
583    }
584
585    #[test]
586    fn test_cross_correlate_identical_unit_impulse() {
587        let a = vec![0.0_f32, 1.0, 0.0];
588        let b = vec![0.0_f32, 1.0, 0.0];
589        let corr = cross_correlate_waveforms(&a, &b);
590        // Peak should be at index b.len()-1 = 2 (zero-lag)
591        let peak = corr
592            .iter()
593            .enumerate()
594            .max_by(|(_, x), (_, y)| x.partial_cmp(y).expect("partial_cmp should succeed"))
595            .expect("test expectation failed");
596        assert_eq!(peak.0, 2);
597    }
598
599    // ── find_max_correlation_offset ───────────────────────────────────────────
600
601    #[test]
602    fn test_find_max_correlation_offset_zero_lag() {
603        let a = vec![0.0_f32, 0.0, 1.0, 0.0, 0.0];
604        let b = vec![0.0_f32, 0.0, 1.0, 0.0, 0.0];
605        let lag = find_max_correlation_offset(&a, &b);
606        assert_eq!(lag, 0);
607    }
608
609    #[test]
610    fn test_find_max_correlation_offset_shifted() {
611        // b = a shifted right by 2 samples
612        let a = vec![0.0_f32, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0];
613        let b = vec![0.0_f32, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0];
614        let lag = find_max_correlation_offset(&a, &b);
615        // b is 2 samples ahead of where we need it → lag should be +2
616        assert_eq!(lag, 2);
617    }
618
619    #[test]
620    fn test_find_max_correlation_offset_empty() {
621        assert_eq!(find_max_correlation_offset(&[], &[]), 0);
622    }
623
624    // ── compute_drift ─────────────────────────────────────────────────────────
625
626    #[test]
627    fn test_compute_drift_zero_duration() {
628        assert_eq!(compute_drift(0, 100, 0), 0.0);
629    }
630
631    #[test]
632    fn test_compute_drift_no_drift() {
633        assert_eq!(compute_drift(50, 50, 60_000), 0.0);
634    }
635
636    #[test]
637    fn test_compute_drift_known_value() {
638        // 100 ms drift over 100_000 ms = 1000 ppm
639        let ppm = compute_drift(0, 100, 100_000);
640        assert!((ppm - 1000.0).abs() < 1e-6, "ppm={ppm}");
641    }
642
643    #[test]
644    fn test_compute_drift_negative() {
645        let ppm = compute_drift(100, 0, 100_000);
646        assert!((ppm + 1000.0).abs() < 1e-6, "ppm={ppm}");
647    }
648
649    // ── FFT ──────────────────────────────────────────────────────────────────
650
651    #[test]
652    fn test_fft_roundtrip() {
653        let n = 16;
654        let mut re: Vec<f64> = (0..n).map(|i| (i as f64 * 0.3).sin()).collect();
655        let mut im = vec![0.0_f64; n];
656        let original = re.clone();
657
658        fft_in_place(&mut re, &mut im, false);
659        fft_in_place(&mut re, &mut im, true);
660
661        for (i, (&orig, &recovered)) in original.iter().zip(re.iter()).enumerate() {
662            assert!(
663                (orig - recovered).abs() < 1e-10,
664                "FFT roundtrip mismatch at {i}: {orig} vs {recovered}"
665            );
666        }
667    }
668
669    #[test]
670    fn test_fft_dc_component() {
671        let n = 8;
672        let mut re = vec![1.0_f64; n];
673        let mut im = vec![0.0_f64; n];
674
675        fft_in_place(&mut re, &mut im, false);
676
677        // DC component should be n, all others zero
678        assert!((re[0] - n as f64).abs() < 1e-10);
679        for i in 1..n {
680            assert!(re[i].abs() < 1e-10, "bin {i} should be zero: {}", re[i]);
681        }
682    }
683
684    // ── Spectral alignment ──────────────────────────────────────────────────
685
686    #[test]
687    fn test_spectral_align_empty() {
688        let config = SpectralAlignConfig::default();
689        let result = spectral_align(&[], &[1.0], &config);
690        assert_eq!(result.offset_samples, 0);
691        assert_eq!(result.confidence, 0.0);
692    }
693
694    #[test]
695    fn test_spectral_align_identical_signals() {
696        let n = 256;
697        let signal: Vec<f32> = (0..n).map(|i| (i as f32 * 0.1).sin()).collect();
698
699        let config = SpectralAlignConfig {
700            fft_size: 512,
701            max_lag: Some(64),
702        };
703        let result = spectral_align(&signal, &signal, &config);
704        assert_eq!(
705            result.offset_samples, 0,
706            "identical signals should have zero offset"
707        );
708        assert!(result.peak_value > 0.0, "peak should be positive");
709    }
710
711    #[test]
712    fn test_spectral_align_known_shift() {
713        let n = 1024;
714        let shift = 10;
715        // Generate a rich signal with many frequency components
716        let signal: Vec<f32> = (0..n)
717            .map(|i| {
718                let t = i as f32;
719                (t * 0.05).sin()
720                    + 0.5 * (t * 0.13).sin()
721                    + 0.3 * (t * 0.21).cos()
722                    + 0.2 * (t * 0.37).sin()
723            })
724            .collect();
725
726        // Create b as a delayed copy of signal (a leads, b is delayed)
727        let mut a_sig = vec![0.0_f32; n];
728        let mut b_sig = vec![0.0_f32; n];
729        for i in 0..n {
730            a_sig[i] = signal[i];
731        }
732        for i in shift..n {
733            b_sig[i] = signal[i - shift];
734        }
735
736        let config = SpectralAlignConfig {
737            fft_size: 2048,
738            max_lag: Some(64),
739        };
740        let result = spectral_align(&a_sig, &b_sig, &config);
741        // b is delayed by `shift` relative to a, so offset should be positive
742        assert!(
743            (result.offset_samples - shift as i32).abs() <= 2,
744            "expected offset ~{shift}, got {}",
745            result.offset_samples
746        );
747    }
748
749    #[test]
750    fn test_spectral_align_negative_shift() {
751        let n = 2048;
752        let shift = 8;
753        let signal: Vec<f32> = (0..n)
754            .map(|i| {
755                let t = i as f32;
756                (t * 0.07).sin()
757                    + 0.5 * (t * 0.19).cos()
758                    + 0.3 * (t * 0.31).sin()
759                    + 0.2 * (t * 0.47).cos()
760            })
761            .collect();
762
763        // Construct both signals with shared interior; avoid edge artefacts
764        // by copying the full overlap region into both a and b.
765        let mut a_sig = vec![0.0_f32; n];
766        let mut b_sig = vec![0.0_f32; n];
767        for i in 0..n {
768            b_sig[i] = signal[i];
769        }
770        for i in shift..n {
771            a_sig[i] = signal[i - shift];
772        }
773
774        let config = SpectralAlignConfig {
775            fft_size: 4096,
776            max_lag: Some(64),
777        };
778        let result = spectral_align(&a_sig, &b_sig, &config);
779        // a is delayed by shift => offset should be negative
780        assert!(
781            (result.offset_samples + shift as i32).abs() <= 2,
782            "expected offset ~-{shift}, got {}",
783            result.offset_samples
784        );
785    }
786
787    #[test]
788    fn test_spectral_align_config_default() {
789        let config = SpectralAlignConfig::default();
790        assert_eq!(config.fft_size, 8192);
791        assert!(config.max_lag.is_none());
792    }
793}
oximedia_align/audio_align.rs

oximedia_align/
audio_align.rs