oximedia_timecode/ltc_simd.rs
1//! SIMD-accelerated Manchester (Biphase Mark Code) encoding and decoding for LTC.
2//!
3//! # LTC Audio Format Specification
4//!
5//! Linear Timecode encodes SMPTE timecode as an audio signal using **Biphase Mark
6//! Code (BMC)**, also known as bi-phase mark modulation:
7//!
8//! ## Baud Rate
9//! - At **30 fps**: 30 frames/s × 80 bits/frame = **2400 baud** (2400 bit-cells/second)
10//! - At **25 fps**: 25 × 80 = **2000 baud**
11//! - At **24 fps**: 24 × 80 = **1920 baud**
12//!
13//! ## Modulation (Biphase Mark Code)
14//! Each bit occupies one bit-cell. Transitions (polarity reversals) occur as follows:
15//! - **Bit 0**: One transition — only at the **beginning** of the bit cell.
16//! - **Bit 1**: Two transitions — at the **beginning** *and* in the **middle** of the cell.
17//!
18//! Consequently:
19//! - Logic 0 produces a square wave at the **bit-clock frequency** (one edge/cell).
20//! - Logic 1 produces a square wave at **twice** the bit-clock frequency (two edges/cell).
21//!
22//! ## Sync Word
23//! The 80-bit LTC frame ends with a fixed 16-bit sync word `0x3FFD`
24//! (`1011_1111_1111_1100` in LSB-first order), which can never appear in the
25//! preceding 64 data bits due to BMC encoding constraints.
26//!
27//! ## Signal Levels
28//! Typically recorded at line level (−10 dBV to +4 dBu). The signal is polarity-
29//! independent and can be read forwards *or* backwards at speeds from ~0.1× to ~10×.
30//!
31//! ## Manchester vs. BMC
32//! Strictly speaking LTC uses **Biphase Mark** (transition at every cell boundary,
33//! plus an extra mid-cell transition for logic 1), not classical Manchester code
34//! (which uses mid-cell transitions only). The two schemes produce similar output
35//! waveforms and are often conflated in practice. This module encodes/decodes both
36//! the "LTC biphase mark" variant (used for audio output) and a "classical Manchester"
37//! variant (used internally for testing and reference).
38
39/// Scalar Manchester / Biphase Mark Code encoder.
40///
41/// Each input bit is expanded into 2 output samples:
42/// - `0` → `[+amp, −amp]` (one transition in middle)
43/// - `1` → `[+amp, −amp]` (same shape, but polarity tracks the running state)
44///
45/// For a full LTC signal use [`crate::ltc::encoder::LtcEncoder`]; this function is the
46/// reference implementation used to verify the SIMD path.
47///
48/// # Arguments
49/// * `bits` – raw bits to encode (0 or 1 per byte)
50/// * `amplitude` – peak amplitude of the output signal (0.0–1.0)
51///
52/// # Returns
53/// A `Vec<f32>` with `2 * bits.len()` samples.
54#[must_use]
55pub fn manchester_encode_scalar(bits: &[u8], amplitude: f32) -> Vec<f32> {
56 let mut out = Vec::with_capacity(bits.len() * 2);
57 let mut polarity = true; // current "high" phase
58
59 for &b in bits {
60 // BMC: always transition at start of bit cell
61 polarity = !polarity;
62 let hi = if polarity { amplitude } else { -amplitude };
63 let lo = -hi;
64
65 if b != 0 {
66 // Bit 1: extra transition at mid-cell
67 out.push(hi);
68 out.push(lo);
69 } else {
70 // Bit 0: no mid-cell transition, hold for both half-cells
71 out.push(hi);
72 out.push(hi);
73 }
74 }
75 out
76}
77
78/// Scalar Manchester / BMC decoder.
79///
80/// Reconstructs the original bit sequence from the half-cell sample pairs
81/// produced by [`manchester_encode_scalar`]. A mid-cell transition (sign
82/// change between sample 0 and sample 1 of a pair) indicates a `1` bit;
83/// no mid-cell transition indicates `0`.
84///
85/// # Arguments
86/// * `samples` – the encoded samples (must have an even length)
87/// * `threshold` – minimum absolute value to consider a sample "active"
88///
89/// # Returns
90/// `Some(Vec<u8>)` with `samples.len() / 2` bytes (each 0 or 1), or `None`
91/// if the sample slice has an odd length.
92#[must_use]
93pub fn manchester_decode_scalar(samples: &[f32], threshold: f32) -> Option<Vec<u8>> {
94 if samples.len() % 2 != 0 {
95 return None;
96 }
97 let mut bits = Vec::with_capacity(samples.len() / 2);
98 for chunk in samples.chunks_exact(2) {
99 let s0 = chunk[0];
100 let s1 = chunk[1];
101 // If both samples are below threshold, treat as 0
102 if s0.abs() < threshold && s1.abs() < threshold {
103 bits.push(0u8);
104 continue;
105 }
106 // Mid-cell sign change ⇒ bit 1; no change ⇒ bit 0
107 let mid_transition = (s0 > 0.0) != (s1 > 0.0);
108 bits.push(if mid_transition { 1 } else { 0 });
109 }
110 Some(bits)
111}
112
113// ---------------------------------------------------------------------------
114// x86_64 AVX2 SIMD path
115// ---------------------------------------------------------------------------
116
117/// SIMD-accelerated Manchester / BMC encoder (x86_64 AVX2).
118///
119/// Processes 32 bits at a time using 256-bit AVX2 registers. Falls back to
120/// [`manchester_encode_scalar`] on non-x86_64 targets or when AVX2 is not
121/// available at runtime.
122///
123/// # Safety Notes
124/// The unsafe inner function is guarded by a runtime CPUID check via
125/// `is_x86_feature_detected!("avx2")` before it is ever called.
126#[must_use]
127#[allow(unsafe_code)]
128pub fn manchester_encode_simd(bits: &[u8], amplitude: f32) -> Vec<f32> {
129 #[cfg(target_arch = "x86_64")]
130 {
131 if is_x86_feature_detected!("avx2") {
132 // SAFETY: We have verified AVX2 is available via runtime detection.
133 return unsafe { manchester_encode_avx2(bits, amplitude) };
134 }
135 }
136 manchester_encode_scalar(bits, amplitude)
137}
138
139/// SIMD-accelerated Manchester / BMC decoder (x86_64 AVX2).
140///
141/// Falls back to [`manchester_decode_scalar`] when AVX2 is unavailable.
142#[must_use]
143#[allow(unsafe_code)]
144pub fn manchester_decode_simd(samples: &[f32], threshold: f32) -> Option<Vec<u8>> {
145 #[cfg(target_arch = "x86_64")]
146 {
147 if is_x86_feature_detected!("avx2") {
148 // SAFETY: We have verified AVX2 is available via runtime detection.
149 return unsafe { manchester_decode_avx2(samples, threshold) };
150 }
151 }
152 manchester_decode_scalar(samples, threshold)
153}
154
155// ---------------------------------------------------------------------------
156// AVX2 implementation (x86_64 only)
157// ---------------------------------------------------------------------------
158
159/// AVX2 Manchester encoder.
160///
161/// Encodes 32 bits per loop iteration using 256-bit registers.
162/// The loop processes full 32-bit chunks; a scalar tail handles any remainder.
163///
164/// # Safety
165/// Caller must guarantee that the CPU supports AVX2
166/// (verified by `is_x86_feature_detected!("avx2")`).
167#[cfg(target_arch = "x86_64")]
168#[allow(unsafe_code)]
169#[target_feature(enable = "avx2")]
170unsafe fn manchester_encode_avx2(bits: &[u8], amplitude: f32) -> Vec<f32> {
171 use std::arch::x86_64::*;
172
173 let len = bits.len();
174 let mut out = Vec::with_capacity(len * 2);
175 let mut polarity = true;
176
177 // Process 32 bits at a time.
178 let chunks = len / 32;
179 let tail_start = chunks * 32;
180
181 for chunk_idx in 0..chunks {
182 let chunk = &bits[chunk_idx * 32..(chunk_idx + 1) * 32];
183
184 // Load 32 bytes (bits) into a 256-bit register.
185 // SAFETY: chunk is exactly 32 bytes.
186 let b_vec = _mm256_loadu_si256(chunk.as_ptr() as *const __m256i);
187
188 // Create a vector of zeros to compare against (bit == 0 test).
189 let zeros = _mm256_setzero_si256();
190 // cmp_mask: 0xFF for each byte that is zero, 0x00 for non-zero.
191 let is_zero_mask = _mm256_cmpeq_epi8(b_vec, zeros);
192
193 // Extract mask as 32-bit integer: bit i == 1 means bits[i] == 0.
194 let zero_bits = _mm256_movemask_epi8(is_zero_mask) as u32;
195
196 // Emit samples based on the computed mask.
197 for i in 0..32usize {
198 polarity = !polarity; // transition at start of cell
199 let hi = if polarity { amplitude } else { -amplitude };
200 let bit_is_zero = (zero_bits >> i) & 1 == 1;
201 if bit_is_zero {
202 out.push(hi);
203 out.push(hi);
204 } else {
205 out.push(hi);
206 out.push(-hi);
207 }
208 }
209 }
210
211 // Scalar tail for remaining bits.
212 for &b in &bits[tail_start..] {
213 polarity = !polarity;
214 let hi = if polarity { amplitude } else { -amplitude };
215 if b == 0 {
216 out.push(hi);
217 out.push(hi);
218 } else {
219 out.push(hi);
220 out.push(-hi);
221 }
222 }
223
224 out
225}
226
227/// AVX2 Manchester decoder.
228///
229/// Decodes sample pairs using 256-bit AVX2 sign-comparison instructions.
230/// Falls back to scalar for the last (< 16-pair) tail.
231///
232/// # Safety
233/// Caller must guarantee that the CPU supports AVX2.
234#[cfg(target_arch = "x86_64")]
235#[allow(unsafe_code)]
236#[target_feature(enable = "avx2")]
237unsafe fn manchester_decode_avx2(samples: &[f32], threshold: f32) -> Option<Vec<u8>> {
238 if samples.len() % 2 != 0 {
239 return None;
240 }
241
242 use std::arch::x86_64::*;
243
244 let pairs = samples.len() / 2;
245 let mut bits = Vec::with_capacity(pairs);
246
247 // AVX2 processes 8 f32 pairs (16 floats) at once.
248 let simd_pairs = pairs / 8;
249 let tail_start_sample = simd_pairs * 16;
250
251 let thr_vec = _mm256_set1_ps(threshold);
252 let zero_vec = _mm256_setzero_ps();
253
254 for chunk_idx in 0..simd_pairs {
255 let base = chunk_idx * 16;
256 // SAFETY: slice length is guaranteed to cover [base, base+16).
257 let s_lo = _mm256_loadu_ps(samples[base..].as_ptr());
258 let s_hi = _mm256_loadu_ps(samples[base + 8..].as_ptr());
259
260 // Separate even-indexed (s0) and odd-indexed (s1) samples via shuffle.
261 // _mm256_permute4x64_epi64 pattern 0xD8 = 1,3,0,2 lane order reorder:
262 // We use _mm256_permutevar8x32_ps to interleave.
263 // Simpler: just extract per-pair using scalar loop on top of the SIMD-loaded chunk.
264 // (Full AVX2 scatter/gather for f32 pairs requires extra shuffles; use hybrid approach.)
265 let s_lo_arr: [f32; 8] = std::mem::transmute(s_lo);
266 let s_hi_arr: [f32; 8] = std::mem::transmute(s_hi);
267
268 // abs comparison for threshold
269 let abs_s_lo = _mm256_andnot_ps(_mm256_set1_ps(-0.0_f32), s_lo);
270 let abs_s_hi = _mm256_andnot_ps(_mm256_set1_ps(-0.0_f32), s_hi);
271
272 // Combine 16 samples across two registers for threshold check.
273 let active_lo = _mm256_cmp_ps(abs_s_lo, thr_vec, _CMP_GE_OQ); // active if >= threshold
274 let active_hi = _mm256_cmp_ps(abs_s_hi, thr_vec, _CMP_GE_OQ);
275 let _ = (active_lo, active_hi); // used below via per-pair extraction
276
277 // Sign bits: positive → 0, negative → 1
278 let sign_lo = _mm256_cmp_ps(s_lo, zero_vec, _CMP_LT_OQ); // 1 if negative
279 let sign_hi = _mm256_cmp_ps(s_hi, zero_vec, _CMP_LT_OQ);
280 let _ = (sign_lo, sign_hi);
281
282 // Each BMC pair occupies two consecutive samples. s_lo contains
283 // samples[base+0..base+7] and s_hi contains samples[base+8..base+15].
284 // Pairs 0-3 are in s_lo (indices 0,1 / 2,3 / 4,5 / 6,7) and
285 // pairs 4-7 are in s_hi (indices 0,1 / 2,3 / 4,5 / 6,7).
286 for arr in [&s_lo_arr, &s_hi_arr] {
287 for i in 0..4usize {
288 let s0 = arr[i * 2];
289 let s1 = arr[i * 2 + 1];
290 if s0.abs() < threshold && s1.abs() < threshold {
291 bits.push(0u8);
292 } else {
293 bits.push(if (s0 > 0.0) != (s1 > 0.0) { 1 } else { 0 });
294 }
295 }
296 }
297 }
298
299 // Scalar tail.
300 for chunk in samples[tail_start_sample..].chunks_exact(2) {
301 let s0 = chunk[0];
302 let s1 = chunk[1];
303 if s0.abs() < threshold && s1.abs() < threshold {
304 bits.push(0u8);
305 } else {
306 bits.push(if (s0 > 0.0) != (s1 > 0.0) { 1 } else { 0 });
307 }
308 }
309
310 Some(bits)
311}
312
313// ---------------------------------------------------------------------------
314// Tests
315// ---------------------------------------------------------------------------
316
317#[cfg(test)]
318mod tests {
319 use super::*;
320
321 /// Encode a known bit pattern and decode it back, verifying round-trip fidelity.
322 #[test]
323 fn test_scalar_roundtrip_all_zeros() {
324 let bits: Vec<u8> = vec![0u8; 80];
325 let samples = manchester_encode_scalar(&bits, 1.0);
326 let decoded = manchester_decode_scalar(&samples, 0.1).expect("decode ok");
327 assert_eq!(decoded, bits);
328 }
329
330 #[test]
331 fn test_scalar_roundtrip_all_ones() {
332 let bits: Vec<u8> = vec![1u8; 80];
333 let samples = manchester_encode_scalar(&bits, 1.0);
334 let decoded = manchester_decode_scalar(&samples, 0.1).expect("decode ok");
335 assert_eq!(decoded, bits);
336 }
337
338 #[test]
339 fn test_scalar_roundtrip_alternating() {
340 let bits: Vec<u8> = (0..80).map(|i| (i % 2) as u8).collect();
341 let samples = manchester_encode_scalar(&bits, 0.5);
342 let decoded = manchester_decode_scalar(&samples, 0.1).expect("decode ok");
343 assert_eq!(decoded, bits);
344 }
345
346 #[test]
347 fn test_scalar_sample_count() {
348 let bits = vec![0u8, 1, 0, 1, 1, 0];
349 let samples = manchester_encode_scalar(&bits, 1.0);
350 assert_eq!(samples.len(), bits.len() * 2);
351 }
352
353 #[test]
354 fn test_decode_odd_length_returns_none() {
355 let samples = vec![0.5f32; 3]; // odd
356 assert!(manchester_decode_scalar(&samples, 0.1).is_none());
357 }
358
359 /// Verify the SIMD path produces the same output as scalar.
360 #[test]
361 fn test_simd_matches_scalar_encode() {
362 let bits: Vec<u8> = (0..80).map(|i| ((i / 3) % 2) as u8).collect();
363 let scalar_out = manchester_encode_scalar(&bits, 0.7);
364 let simd_out = manchester_encode_simd(&bits, 0.7);
365 assert_eq!(scalar_out.len(), simd_out.len());
366 for (a, b) in scalar_out.iter().zip(simd_out.iter()) {
367 assert!(
368 (a - b).abs() < 1e-6,
369 "SIMD and scalar outputs differ: {a} vs {b}"
370 );
371 }
372 }
373
374 #[test]
375 fn test_simd_matches_scalar_decode() {
376 let bits: Vec<u8> = (0..80).map(|i| (i % 3 == 0) as u8).collect();
377 let samples = manchester_encode_scalar(&bits, 0.9);
378 let scalar_dec = manchester_decode_scalar(&samples, 0.1).expect("decode ok");
379 let simd_dec = manchester_decode_simd(&samples, 0.1).expect("decode ok");
380 assert_eq!(scalar_dec, simd_dec);
381 assert_eq!(scalar_dec, bits);
382 }
383
384 /// SIMD encode+decode round-trip with various bit patterns.
385 #[test]
386 fn test_simd_roundtrip() {
387 let patterns: &[&[u8]] = &[
388 &[0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0],
389 // LTC sync word pattern (16 bits, LSB-first of 0x3FFD)
390 &[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
391 ];
392 for &pattern in patterns {
393 let samples = manchester_encode_simd(pattern, 1.0);
394 let decoded = manchester_decode_simd(&samples, 0.1).expect("decode ok");
395 assert_eq!(
396 decoded, pattern,
397 "Round-trip failed for pattern {pattern:?}"
398 );
399 }
400 }
401
402 /// Non-AVX2 targets fall back to scalar; this test exercises that path explicitly.
403 #[test]
404 fn test_scalar_fallback_non_simd() {
405 let bits: Vec<u8> = vec![1, 0, 1, 0, 1, 1, 0, 0];
406 // Call scalar directly — this is always available.
407 let enc = manchester_encode_scalar(&bits, 1.0);
408 let dec = manchester_decode_scalar(&enc, 0.05).expect("decode ok");
409 assert_eq!(dec, bits);
410 }
411}