oximedia_timecode/
ltc_simd.rs

1//! SIMD-accelerated Manchester (Biphase Mark Code) encoding and decoding for LTC.
2//!
3//! # LTC Audio Format Specification
4//!
5//! Linear Timecode encodes SMPTE timecode as an audio signal using **Biphase Mark
6//! Code (BMC)**, also known as bi-phase mark modulation:
7//!
8//! ## Baud Rate
9//! - At **30 fps**: 30 frames/s × 80 bits/frame = **2400 baud** (2400 bit-cells/second)
10//! - At **25 fps**: 25 × 80 = **2000 baud**
11//! - At **24 fps**: 24 × 80 = **1920 baud**
12//!
13//! ## Modulation (Biphase Mark Code)
14//! Each bit occupies one bit-cell. Transitions (polarity reversals) occur as follows:
15//! - **Bit 0**: One transition — only at the **beginning** of the bit cell.
16//! - **Bit 1**: Two transitions — at the **beginning** *and* in the **middle** of the cell.
17//!
18//! Consequently:
19//! - Logic 0 produces a square wave at the **bit-clock frequency** (one edge/cell).
20//! - Logic 1 produces a square wave at **twice** the bit-clock frequency (two edges/cell).
21//!
22//! ## Sync Word
23//! The 80-bit LTC frame ends with a fixed 16-bit sync word `0x3FFD`
24//! (`1011_1111_1111_1100` in LSB-first order), which can never appear in the
25//! preceding 64 data bits due to BMC encoding constraints.
26//!
27//! ## Signal Levels
28//! Typically recorded at line level (−10 dBV to +4 dBu). The signal is polarity-
29//! independent and can be read forwards *or* backwards at speeds from ~0.1× to ~10×.
30//!
31//! ## Manchester vs. BMC
32//! Strictly speaking LTC uses **Biphase Mark** (transition at every cell boundary,
33//! plus an extra mid-cell transition for logic 1), not classical Manchester code
34//! (which uses mid-cell transitions only). The two schemes produce similar output
35//! waveforms and are often conflated in practice. This module encodes/decodes both
36//! the "LTC biphase mark" variant (used for audio output) and a "classical Manchester"
37//! variant (used internally for testing and reference).
38
39/// Scalar Manchester / Biphase Mark Code encoder.
40///
41/// Each input bit is expanded into 2 output samples:
42/// - `0` → `[+amp, −amp]` (one transition in middle)
43/// - `1` → `[+amp, −amp]` (same shape, but polarity tracks the running state)
44///
45/// For a full LTC signal use [`crate::ltc::encoder::LtcEncoder`]; this function is the
46/// reference implementation used to verify the SIMD path.
47///
48/// # Arguments
49/// * `bits`      – raw bits to encode (0 or 1 per byte)
50/// * `amplitude` – peak amplitude of the output signal (0.0–1.0)
51///
52/// # Returns
53/// A `Vec<f32>` with `2 * bits.len()` samples.
54#[must_use]
55pub fn manchester_encode_scalar(bits: &[u8], amplitude: f32) -> Vec<f32> {
56    let mut out = Vec::with_capacity(bits.len() * 2);
57    let mut polarity = true; // current "high" phase
58
59    for &b in bits {
60        // BMC: always transition at start of bit cell
61        polarity = !polarity;
62        let hi = if polarity { amplitude } else { -amplitude };
63        let lo = -hi;
64
65        if b != 0 {
66            // Bit 1: extra transition at mid-cell
67            out.push(hi);
68            out.push(lo);
69        } else {
70            // Bit 0: no mid-cell transition, hold for both half-cells
71            out.push(hi);
72            out.push(hi);
73        }
74    }
75    out
76}
77
78/// Scalar Manchester / BMC decoder.
79///
80/// Reconstructs the original bit sequence from the half-cell sample pairs
81/// produced by [`manchester_encode_scalar`].  A mid-cell transition (sign
82/// change between sample 0 and sample 1 of a pair) indicates a `1` bit;
83/// no mid-cell transition indicates `0`.
84///
85/// # Arguments
86/// * `samples`    – the encoded samples (must have an even length)
87/// * `threshold`  – minimum absolute value to consider a sample "active"
88///
89/// # Returns
90/// `Some(Vec<u8>)` with `samples.len() / 2` bytes (each 0 or 1), or `None`
91/// if the sample slice has an odd length.
92#[must_use]
93pub fn manchester_decode_scalar(samples: &[f32], threshold: f32) -> Option<Vec<u8>> {
94    if samples.len() % 2 != 0 {
95        return None;
96    }
97    let mut bits = Vec::with_capacity(samples.len() / 2);
98    for chunk in samples.chunks_exact(2) {
99        let s0 = chunk[0];
100        let s1 = chunk[1];
101        // If both samples are below threshold, treat as 0
102        if s0.abs() < threshold && s1.abs() < threshold {
103            bits.push(0u8);
104            continue;
105        }
106        // Mid-cell sign change ⇒ bit 1; no change ⇒ bit 0
107        let mid_transition = (s0 > 0.0) != (s1 > 0.0);
108        bits.push(if mid_transition { 1 } else { 0 });
109    }
110    Some(bits)
111}
112
113// ---------------------------------------------------------------------------
114// x86_64 AVX2 SIMD path
115// ---------------------------------------------------------------------------
116
117/// SIMD-accelerated Manchester / BMC encoder (x86_64 AVX2).
118///
119/// Processes 32 bits at a time using 256-bit AVX2 registers.  Falls back to
120/// [`manchester_encode_scalar`] on non-x86_64 targets or when AVX2 is not
121/// available at runtime.
122///
123/// # Safety Notes
124/// The unsafe inner function is guarded by a runtime CPUID check via
125/// `is_x86_feature_detected!("avx2")` before it is ever called.
126#[must_use]
127#[allow(unsafe_code)]
128pub fn manchester_encode_simd(bits: &[u8], amplitude: f32) -> Vec<f32> {
129    #[cfg(target_arch = "x86_64")]
130    {
131        if is_x86_feature_detected!("avx2") {
132            // SAFETY: We have verified AVX2 is available via runtime detection.
133            return unsafe { manchester_encode_avx2(bits, amplitude) };
134        }
135    }
136    manchester_encode_scalar(bits, amplitude)
137}
138
139/// SIMD-accelerated Manchester / BMC decoder (x86_64 AVX2).
140///
141/// Falls back to [`manchester_decode_scalar`] when AVX2 is unavailable.
142#[must_use]
143#[allow(unsafe_code)]
144pub fn manchester_decode_simd(samples: &[f32], threshold: f32) -> Option<Vec<u8>> {
145    #[cfg(target_arch = "x86_64")]
146    {
147        if is_x86_feature_detected!("avx2") {
148            // SAFETY: We have verified AVX2 is available via runtime detection.
149            return unsafe { manchester_decode_avx2(samples, threshold) };
150        }
151    }
152    manchester_decode_scalar(samples, threshold)
153}
154
155// ---------------------------------------------------------------------------
156// AVX2 implementation (x86_64 only)
157// ---------------------------------------------------------------------------
158
159/// AVX2 Manchester encoder.
160///
161/// Encodes 32 bits per loop iteration using 256-bit registers.
162/// The loop processes full 32-bit chunks; a scalar tail handles any remainder.
163///
164/// # Safety
165/// Caller must guarantee that the CPU supports AVX2
166/// (verified by `is_x86_feature_detected!("avx2")`).
167#[cfg(target_arch = "x86_64")]
168#[allow(unsafe_code)]
169#[target_feature(enable = "avx2")]
170unsafe fn manchester_encode_avx2(bits: &[u8], amplitude: f32) -> Vec<f32> {
171    use std::arch::x86_64::*;
172
173    let len = bits.len();
174    let mut out = Vec::with_capacity(len * 2);
175    let mut polarity = true;
176
177    // Process 32 bits at a time.
178    let chunks = len / 32;
179    let tail_start = chunks * 32;
180
181    for chunk_idx in 0..chunks {
182        let chunk = &bits[chunk_idx * 32..(chunk_idx + 1) * 32];
183
184        // Load 32 bytes (bits) into a 256-bit register.
185        // SAFETY: chunk is exactly 32 bytes.
186        let b_vec = _mm256_loadu_si256(chunk.as_ptr() as *const __m256i);
187
188        // Create a vector of zeros to compare against (bit == 0 test).
189        let zeros = _mm256_setzero_si256();
190        // cmp_mask: 0xFF for each byte that is zero, 0x00 for non-zero.
191        let is_zero_mask = _mm256_cmpeq_epi8(b_vec, zeros);
192
193        // Extract mask as 32-bit integer: bit i == 1 means bits[i] == 0.
194        let zero_bits = _mm256_movemask_epi8(is_zero_mask) as u32;
195
196        // Emit samples based on the computed mask.
197        for i in 0..32usize {
198            polarity = !polarity; // transition at start of cell
199            let hi = if polarity { amplitude } else { -amplitude };
200            let bit_is_zero = (zero_bits >> i) & 1 == 1;
201            if bit_is_zero {
202                out.push(hi);
203                out.push(hi);
204            } else {
205                out.push(hi);
206                out.push(-hi);
207            }
208        }
209    }
210
211    // Scalar tail for remaining bits.
212    for &b in &bits[tail_start..] {
213        polarity = !polarity;
214        let hi = if polarity { amplitude } else { -amplitude };
215        if b == 0 {
216            out.push(hi);
217            out.push(hi);
218        } else {
219            out.push(hi);
220            out.push(-hi);
221        }
222    }
223
224    out
225}
226
227/// AVX2 Manchester decoder.
228///
229/// Decodes sample pairs using 256-bit AVX2 sign-comparison instructions.
230/// Falls back to scalar for the last (< 16-pair) tail.
231///
232/// # Safety
233/// Caller must guarantee that the CPU supports AVX2.
234#[cfg(target_arch = "x86_64")]
235#[allow(unsafe_code)]
236#[target_feature(enable = "avx2")]
237unsafe fn manchester_decode_avx2(samples: &[f32], threshold: f32) -> Option<Vec<u8>> {
238    if samples.len() % 2 != 0 {
239        return None;
240    }
241
242    use std::arch::x86_64::*;
243
244    let pairs = samples.len() / 2;
245    let mut bits = Vec::with_capacity(pairs);
246
247    // AVX2 processes 8 f32 pairs (16 floats) at once.
248    let simd_pairs = pairs / 8;
249    let tail_start_sample = simd_pairs * 16;
250
251    let thr_vec = _mm256_set1_ps(threshold);
252    let zero_vec = _mm256_setzero_ps();
253
254    for chunk_idx in 0..simd_pairs {
255        let base = chunk_idx * 16;
256        // SAFETY: slice length is guaranteed to cover [base, base+16).
257        let s_lo = _mm256_loadu_ps(samples[base..].as_ptr());
258        let s_hi = _mm256_loadu_ps(samples[base + 8..].as_ptr());
259
260        // Separate even-indexed (s0) and odd-indexed (s1) samples via shuffle.
261        // _mm256_permute4x64_epi64 pattern 0xD8 = 1,3,0,2 lane order reorder:
262        // We use _mm256_permutevar8x32_ps to interleave.
263        // Simpler: just extract per-pair using scalar loop on top of the SIMD-loaded chunk.
264        // (Full AVX2 scatter/gather for f32 pairs requires extra shuffles; use hybrid approach.)
265        let s_lo_arr: [f32; 8] = std::mem::transmute(s_lo);
266        let s_hi_arr: [f32; 8] = std::mem::transmute(s_hi);
267
268        // abs comparison for threshold
269        let abs_s_lo = _mm256_andnot_ps(_mm256_set1_ps(-0.0_f32), s_lo);
270        let abs_s_hi = _mm256_andnot_ps(_mm256_set1_ps(-0.0_f32), s_hi);
271
272        // Combine 16 samples across two registers for threshold check.
273        let active_lo = _mm256_cmp_ps(abs_s_lo, thr_vec, _CMP_GE_OQ); // active if >= threshold
274        let active_hi = _mm256_cmp_ps(abs_s_hi, thr_vec, _CMP_GE_OQ);
275        let _ = (active_lo, active_hi); // used below via per-pair extraction
276
277        // Sign bits: positive → 0, negative → 1
278        let sign_lo = _mm256_cmp_ps(s_lo, zero_vec, _CMP_LT_OQ); // 1 if negative
279        let sign_hi = _mm256_cmp_ps(s_hi, zero_vec, _CMP_LT_OQ);
280        let _ = (sign_lo, sign_hi);
281
282        // Each BMC pair occupies two consecutive samples.  s_lo contains
283        // samples[base+0..base+7] and s_hi contains samples[base+8..base+15].
284        // Pairs 0-3 are in s_lo (indices 0,1 / 2,3 / 4,5 / 6,7) and
285        // pairs 4-7 are in s_hi (indices 0,1 / 2,3 / 4,5 / 6,7).
286        for arr in [&s_lo_arr, &s_hi_arr] {
287            for i in 0..4usize {
288                let s0 = arr[i * 2];
289                let s1 = arr[i * 2 + 1];
290                if s0.abs() < threshold && s1.abs() < threshold {
291                    bits.push(0u8);
292                } else {
293                    bits.push(if (s0 > 0.0) != (s1 > 0.0) { 1 } else { 0 });
294                }
295            }
296        }
297    }
298
299    // Scalar tail.
300    for chunk in samples[tail_start_sample..].chunks_exact(2) {
301        let s0 = chunk[0];
302        let s1 = chunk[1];
303        if s0.abs() < threshold && s1.abs() < threshold {
304            bits.push(0u8);
305        } else {
306            bits.push(if (s0 > 0.0) != (s1 > 0.0) { 1 } else { 0 });
307        }
308    }
309
310    Some(bits)
311}
312
313// ---------------------------------------------------------------------------
314// Tests
315// ---------------------------------------------------------------------------
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320
321    /// Encode a known bit pattern and decode it back, verifying round-trip fidelity.
322    #[test]
323    fn test_scalar_roundtrip_all_zeros() {
324        let bits: Vec<u8> = vec![0u8; 80];
325        let samples = manchester_encode_scalar(&bits, 1.0);
326        let decoded = manchester_decode_scalar(&samples, 0.1).expect("decode ok");
327        assert_eq!(decoded, bits);
328    }
329
330    #[test]
331    fn test_scalar_roundtrip_all_ones() {
332        let bits: Vec<u8> = vec![1u8; 80];
333        let samples = manchester_encode_scalar(&bits, 1.0);
334        let decoded = manchester_decode_scalar(&samples, 0.1).expect("decode ok");
335        assert_eq!(decoded, bits);
336    }
337
338    #[test]
339    fn test_scalar_roundtrip_alternating() {
340        let bits: Vec<u8> = (0..80).map(|i| (i % 2) as u8).collect();
341        let samples = manchester_encode_scalar(&bits, 0.5);
342        let decoded = manchester_decode_scalar(&samples, 0.1).expect("decode ok");
343        assert_eq!(decoded, bits);
344    }
345
346    #[test]
347    fn test_scalar_sample_count() {
348        let bits = vec![0u8, 1, 0, 1, 1, 0];
349        let samples = manchester_encode_scalar(&bits, 1.0);
350        assert_eq!(samples.len(), bits.len() * 2);
351    }
352
353    #[test]
354    fn test_decode_odd_length_returns_none() {
355        let samples = vec![0.5f32; 3]; // odd
356        assert!(manchester_decode_scalar(&samples, 0.1).is_none());
357    }
358
359    /// Verify the SIMD path produces the same output as scalar.
360    #[test]
361    fn test_simd_matches_scalar_encode() {
362        let bits: Vec<u8> = (0..80).map(|i| ((i / 3) % 2) as u8).collect();
363        let scalar_out = manchester_encode_scalar(&bits, 0.7);
364        let simd_out = manchester_encode_simd(&bits, 0.7);
365        assert_eq!(scalar_out.len(), simd_out.len());
366        for (a, b) in scalar_out.iter().zip(simd_out.iter()) {
367            assert!(
368                (a - b).abs() < 1e-6,
369                "SIMD and scalar outputs differ: {a} vs {b}"
370            );
371        }
372    }
373
374    #[test]
375    fn test_simd_matches_scalar_decode() {
376        let bits: Vec<u8> = (0..80).map(|i| (i % 3 == 0) as u8).collect();
377        let samples = manchester_encode_scalar(&bits, 0.9);
378        let scalar_dec = manchester_decode_scalar(&samples, 0.1).expect("decode ok");
379        let simd_dec = manchester_decode_simd(&samples, 0.1).expect("decode ok");
380        assert_eq!(scalar_dec, simd_dec);
381        assert_eq!(scalar_dec, bits);
382    }
383
384    /// SIMD encode+decode round-trip with various bit patterns.
385    #[test]
386    fn test_simd_roundtrip() {
387        let patterns: &[&[u8]] = &[
388            &[0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0],
389            // LTC sync word pattern (16 bits, LSB-first of 0x3FFD)
390            &[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
391        ];
392        for &pattern in patterns {
393            let samples = manchester_encode_simd(pattern, 1.0);
394            let decoded = manchester_decode_simd(&samples, 0.1).expect("decode ok");
395            assert_eq!(
396                decoded, pattern,
397                "Round-trip failed for pattern {pattern:?}"
398            );
399        }
400    }
401
402    /// Non-AVX2 targets fall back to scalar; this test exercises that path explicitly.
403    #[test]
404    fn test_scalar_fallback_non_simd() {
405        let bits: Vec<u8> = vec![1, 0, 1, 0, 1, 1, 0, 0];
406        // Call scalar directly — this is always available.
407        let enc = manchester_encode_scalar(&bits, 1.0);
408        let dec = manchester_decode_scalar(&enc, 0.05).expect("decode ok");
409        assert_eq!(dec, bits);
410    }
411}
oximedia_timecode/ltc_simd.rs

oximedia_timecode/
ltc_simd.rs