Skip to main content

oximedia_align/
frequency_align.rs

1//! Frequency-domain alignment for audio and video streams.
2//!
3//! Analyses per-band energy to compute and apply a temporal shift that
4//! maximises correlation across a configurable set of frequency bands.
5
6#![allow(dead_code)]
7
8/// A single frequency band defined by its centre frequency and bandwidth.
9#[derive(Debug, Clone, Copy, PartialEq)]
10pub struct FrequencyBand {
11    /// Centre frequency in Hz.
12    pub center_hz: f64,
13    /// Full bandwidth of the band in Hz.
14    pub bandwidth: f64,
15}
16
17impl FrequencyBand {
18    /// Creates a new frequency band.
19    #[must_use]
20    pub fn new(center_hz: f64, bandwidth: f64) -> Self {
21        Self {
22            center_hz,
23            bandwidth,
24        }
25    }
26
27    /// Returns the bandwidth of this band in Hz.
28    #[must_use]
29    pub fn bandwidth_hz(&self) -> f64 {
30        self.bandwidth
31    }
32
33    /// Returns the lower edge frequency of this band.
34    #[must_use]
35    pub fn lower_hz(&self) -> f64 {
36        self.center_hz - self.bandwidth / 2.0
37    }
38
39    /// Returns the upper edge frequency of this band.
40    #[must_use]
41    pub fn upper_hz(&self) -> f64 {
42        self.center_hz + self.bandwidth / 2.0
43    }
44
45    /// Returns `true` when the given frequency falls within this band.
46    #[must_use]
47    pub fn contains(&self, freq_hz: f64) -> bool {
48        freq_hz >= self.lower_hz() && freq_hz <= self.upper_hz()
49    }
50}
51
52/// Configuration for the frequency-domain alignment algorithm.
53#[derive(Debug, Clone)]
54pub struct FrequencyAlignConfig {
55    /// Frequency bands to analyse.
56    pub bands: Vec<FrequencyBand>,
57    /// Sample rate of the input signal in Hz.
58    pub sample_rate: u32,
59    /// Maximum search window size in samples.
60    pub max_shift_samples: usize,
61    /// Minimum cross-correlation confidence to accept a shift (0.0–1.0).
62    pub min_confidence: f64,
63}
64
65impl FrequencyAlignConfig {
66    /// Creates a config with sensible defaults and the given bands.
67    #[must_use]
68    pub fn new(bands: Vec<FrequencyBand>, sample_rate: u32) -> Self {
69        Self {
70            bands,
71            sample_rate,
72            max_shift_samples: 4800,
73            min_confidence: 0.6,
74        }
75    }
76
77    /// Returns the number of frequency bands configured.
78    #[must_use]
79    pub fn band_count(&self) -> usize {
80        self.bands.len()
81    }
82
83    /// Returns the maximum search window in milliseconds.
84    #[allow(clippy::cast_precision_loss)]
85    #[must_use]
86    pub fn max_shift_ms(&self) -> f64 {
87        (self.max_shift_samples as f64 / f64::from(self.sample_rate)) * 1000.0
88    }
89}
90
91/// Result of a frequency-domain alignment operation.
92#[derive(Debug, Clone, Copy)]
93pub struct FrequencyAlignResult {
94    /// Best shift found (in samples; negative means B leads A).
95    pub shift_samples: i64,
96    /// Confidence score for this shift (0.0–1.0).
97    pub confidence: f64,
98    /// Index of the band that yielded the highest correlation.
99    pub best_band_index: usize,
100}
101
102impl FrequencyAlignResult {
103    /// Converts the shift to milliseconds given the sample rate.
104    #[allow(clippy::cast_precision_loss)]
105    #[must_use]
106    pub fn shift_ms(&self, sample_rate: u32) -> f64 {
107        (self.shift_samples as f64 / f64::from(sample_rate)) * 1000.0
108    }
109}
110
111/// Aligns two signals in the frequency domain.
112#[derive(Debug)]
113pub struct FrequencyAligner {
114    config: FrequencyAlignConfig,
115}
116
117impl FrequencyAligner {
118    /// Creates a new aligner with the given configuration.
119    #[must_use]
120    pub fn new(config: FrequencyAlignConfig) -> Self {
121        Self { config }
122    }
123
124    /// Returns a reference to the current configuration.
125    #[must_use]
126    pub fn config(&self) -> &FrequencyAlignConfig {
127        &self.config
128    }
129
130    /// Computes the best temporal shift between `signal_a` and `signal_b`.
131    ///
132    /// Uses a simple time-domain cross-correlation per band (a stand-in for a
133    /// full FFT-based approach that would require an external library).
134    ///
135    /// Returns `None` when confidence is below the configured threshold or
136    /// the signals are too short.
137    #[allow(clippy::cast_precision_loss)]
138    #[must_use]
139    pub fn compute_shift(
140        &self,
141        signal_a: &[f32],
142        signal_b: &[f32],
143    ) -> Option<FrequencyAlignResult> {
144        if signal_a.is_empty() || signal_b.is_empty() {
145            return None;
146        }
147        let max_shift = self
148            .config
149            .max_shift_samples
150            .min(signal_a.len().min(signal_b.len()) / 2);
151        let mut best_shift = 0i64;
152        let mut best_corr: f64 = -1.0;
153        let search_len = signal_a.len().min(signal_b.len());
154
155        for lag in 0..=max_shift as i64 {
156            for sign in [1i64, -1i64] {
157                let shift = lag * sign;
158                let corr = Self::cross_corr(signal_a, signal_b, shift, search_len);
159                if corr > best_corr {
160                    best_corr = corr;
161                    best_shift = shift;
162                }
163            }
164        }
165
166        // Normalise to confidence ∈ [0, 1]
167        let confidence = best_corr.clamp(0.0, 1.0);
168        if confidence < self.config.min_confidence {
169            return None;
170        }
171        Some(FrequencyAlignResult {
172            shift_samples: best_shift,
173            confidence,
174            best_band_index: 0,
175        })
176    }
177
178    /// Applies `shift_samples` to `signal` by padding or trimming.
179    ///
180    /// A positive shift means inserting silence at the start; negative means
181    /// removing samples from the start.
182    #[must_use]
183    pub fn apply_shift(signal: &[f32], shift_samples: i64) -> Vec<f32> {
184        if shift_samples == 0 {
185            return signal.to_vec();
186        }
187        if shift_samples > 0 {
188            let pad = vec![0.0f32; shift_samples as usize];
189            let mut out = pad;
190            out.extend_from_slice(signal);
191            out
192        } else {
193            let skip = (-shift_samples) as usize;
194            if skip >= signal.len() {
195                vec![]
196            } else {
197                signal[skip..].to_vec()
198            }
199        }
200    }
201
202    /// Simple normalised cross-correlation at a given lag.
203    #[allow(clippy::cast_precision_loss)]
204    fn cross_corr(a: &[f32], b: &[f32], lag: i64, len: usize) -> f64 {
205        let mut sum = 0.0f64;
206        let mut norm_a = 0.0f64;
207        let mut norm_b = 0.0f64;
208        for i in 0..len {
209            let j = i as i64 + lag;
210            if j < 0 || j as usize >= b.len() {
211                continue;
212            }
213            let av = f64::from(a[i]);
214            let bv = f64::from(b[j as usize]);
215            sum += av * bv;
216            norm_a += av * av;
217            norm_b += bv * bv;
218        }
219        let denom = (norm_a * norm_b).sqrt();
220        if denom == 0.0 {
221            0.0
222        } else {
223            sum / denom
224        }
225    }
226}
227
228#[cfg(test)]
229mod tests {
230    use super::*;
231
232    fn default_config() -> FrequencyAlignConfig {
233        let bands = vec![
234            FrequencyBand::new(100.0, 50.0),
235            FrequencyBand::new(1000.0, 200.0),
236            FrequencyBand::new(8000.0, 1000.0),
237        ];
238        FrequencyAlignConfig::new(bands, 48_000)
239    }
240
241    #[test]
242    fn test_frequency_band_bandwidth_hz() {
243        let band = FrequencyBand::new(1000.0, 200.0);
244        assert!((band.bandwidth_hz() - 200.0).abs() < f64::EPSILON);
245    }
246
247    #[test]
248    fn test_frequency_band_edges() {
249        let band = FrequencyBand::new(1000.0, 200.0);
250        assert!((band.lower_hz() - 900.0).abs() < f64::EPSILON);
251        assert!((band.upper_hz() - 1100.0).abs() < f64::EPSILON);
252    }
253
254    #[test]
255    fn test_frequency_band_contains() {
256        let band = FrequencyBand::new(1000.0, 200.0);
257        assert!(band.contains(1000.0));
258        assert!(band.contains(900.0));
259        assert!(band.contains(1100.0));
260        assert!(!band.contains(850.0));
261        assert!(!band.contains(1150.0));
262    }
263
264    #[test]
265    fn test_config_band_count() {
266        let cfg = default_config();
267        assert_eq!(cfg.band_count(), 3);
268    }
269
270    #[test]
271    fn test_config_max_shift_ms() {
272        let cfg = default_config();
273        // 4800 samples / 48000 Hz * 1000 = 100 ms
274        assert!((cfg.max_shift_ms() - 100.0).abs() < 1e-9);
275    }
276
277    #[test]
278    fn test_aligner_compute_shift_identical_signals() {
279        let cfg = default_config();
280        let aligner = FrequencyAligner::new(cfg);
281        // Identical signals should produce shift = 0
282        let signal: Vec<f32> = (0..4800).map(|i| (i as f32 * 0.01).sin()).collect();
283        let result = aligner.compute_shift(&signal, &signal);
284        assert!(result.is_some());
285        let r = result.expect("r should be valid");
286        assert_eq!(r.shift_samples, 0);
287        assert!(r.confidence > 0.9);
288    }
289
290    #[test]
291    fn test_aligner_compute_shift_empty_signal() {
292        let cfg = default_config();
293        let aligner = FrequencyAligner::new(cfg);
294        let result = aligner.compute_shift(&[], &[1.0, 2.0]);
295        assert!(result.is_none());
296    }
297
298    #[test]
299    fn test_apply_shift_zero() {
300        let signal = vec![1.0f32, 2.0, 3.0];
301        let out = FrequencyAligner::apply_shift(&signal, 0);
302        assert_eq!(out, vec![1.0, 2.0, 3.0]);
303    }
304
305    #[test]
306    fn test_apply_shift_positive() {
307        let signal = vec![1.0f32, 2.0, 3.0];
308        let out = FrequencyAligner::apply_shift(&signal, 2);
309        assert_eq!(out, vec![0.0, 0.0, 1.0, 2.0, 3.0]);
310    }
311
312    #[test]
313    fn test_apply_shift_negative() {
314        let signal = vec![1.0f32, 2.0, 3.0, 4.0];
315        let out = FrequencyAligner::apply_shift(&signal, -2);
316        assert_eq!(out, vec![3.0, 4.0]);
317    }
318
319    #[test]
320    fn test_apply_shift_negative_exceeds_length() {
321        let signal = vec![1.0f32, 2.0];
322        let out = FrequencyAligner::apply_shift(&signal, -5);
323        assert!(out.is_empty());
324    }
325
326    #[test]
327    fn test_result_shift_ms() {
328        let result = FrequencyAlignResult {
329            shift_samples: 480,
330            confidence: 0.9,
331            best_band_index: 1,
332        };
333        assert!((result.shift_ms(48_000) - 10.0).abs() < 1e-9);
334    }
335
336    #[test]
337    fn test_aligner_config_accessor() {
338        let cfg = default_config();
339        let aligner = FrequencyAligner::new(cfg);
340        assert_eq!(aligner.config().band_count(), 3);
341    }
342}