1use std::sync::OnceLock;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub struct SimdSupport {
11 pub sse2: bool,
13 pub avx2: bool,
15 pub neon: bool,
17}
18
19static SIMD_SUPPORT: OnceLock<SimdSupport> = OnceLock::new();
21
22pub fn init_simd_support() {
24 SIMD_SUPPORT.get_or_init(|| detect_simd_support());
25}
26
27fn detect_simd_support() -> SimdSupport {
29 #[cfg(target_arch = "x86_64")]
30 {
31 SimdSupport {
32 sse2: is_x86_feature_detected!("sse2"),
33 avx2: is_x86_feature_detected!("avx2"),
34 neon: false,
35 }
36 }
37 #[cfg(target_arch = "aarch64")]
38 {
39 SimdSupport {
40 sse2: false,
41 avx2: false,
42 neon: std::arch::is_aarch64_feature_detected!("neon"),
43 }
44 }
45 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
46 {
47 SimdSupport {
48 sse2: false,
49 avx2: false,
50 neon: false,
51 }
52 }
53}
54
55pub fn get_simd_support() -> SimdSupport {
57 *SIMD_SUPPORT.get_or_init(|| detect_simd_support())
58}
59
60pub fn has_simd_support() -> bool {
62 let support = get_simd_support();
63 support.sse2 || support.avx2 || support.neon
64}
65
66#[cfg(target_arch = "x86_64")]
68pub fn encode_mulaw_simd_sse2(samples: &[i16], output: &mut [u8]) {
69 use std::arch::x86_64::*;
70
71 if !get_simd_support().sse2 {
72 return encode_mulaw_scalar(samples, output);
73 }
74
75 let mut chunks = samples.chunks_exact(8);
76 let mut out_idx = 0;
77
78 unsafe {
79 for chunk in chunks.by_ref() {
80 let samples_vec = _mm_loadu_si128(chunk.as_ptr() as *const __m128i);
82
83 output[out_idx] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 0) as i16);
86 output[out_idx + 1] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 1) as i16);
87 output[out_idx + 2] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 2) as i16);
88 output[out_idx + 3] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 3) as i16);
89 output[out_idx + 4] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 4) as i16);
90 output[out_idx + 5] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 5) as i16);
91 output[out_idx + 6] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 6) as i16);
92 output[out_idx + 7] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 7) as i16);
93 out_idx += 8;
94 }
95 }
96
97 for &sample in chunks.remainder() {
99 output[out_idx] = linear_to_mulaw_scalar(sample);
100 out_idx += 1;
101 }
102}
103
104#[cfg(target_arch = "aarch64")]
106pub fn encode_mulaw_simd_neon(samples: &[i16], output: &mut [u8]) {
107 if !get_simd_support().neon {
108 return encode_mulaw_scalar(samples, output);
109 }
110
111 encode_mulaw_scalar(samples, output);
113}
114
115pub fn encode_mulaw_scalar(samples: &[i16], output: &mut [u8]) {
117 for (i, &sample) in samples.iter().enumerate() {
118 output[i] = linear_to_mulaw_scalar(sample);
119 }
120}
121
122#[cfg(target_arch = "x86_64")]
124pub fn encode_alaw_simd_sse2(samples: &[i16], output: &mut [u8]) {
125 use std::arch::x86_64::*;
126
127 if !get_simd_support().sse2 {
128 return encode_alaw_scalar(samples, output);
129 }
130
131 let mut chunks = samples.chunks_exact(8);
132 let mut out_idx = 0;
133
134 unsafe {
135 for chunk in chunks.by_ref() {
136 let samples_vec = _mm_loadu_si128(chunk.as_ptr() as *const __m128i);
138
139 output[out_idx] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 0) as i16);
142 output[out_idx + 1] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 1) as i16);
143 output[out_idx + 2] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 2) as i16);
144 output[out_idx + 3] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 3) as i16);
145 output[out_idx + 4] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 4) as i16);
146 output[out_idx + 5] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 5) as i16);
147 output[out_idx + 6] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 6) as i16);
148 output[out_idx + 7] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 7) as i16);
149 out_idx += 8;
150 }
151 }
152
153 for &sample in chunks.remainder() {
155 output[out_idx] = linear_to_alaw_scalar(sample);
156 out_idx += 1;
157 }
158}
159
160#[cfg(target_arch = "aarch64")]
162pub fn encode_alaw_simd_neon(samples: &[i16], output: &mut [u8]) {
163 if !get_simd_support().neon {
164 return encode_alaw_scalar(samples, output);
165 }
166
167 encode_alaw_scalar(samples, output);
169}
170
171pub fn encode_alaw_scalar(samples: &[i16], output: &mut [u8]) {
173 for (i, &sample) in samples.iter().enumerate() {
174 output[i] = linear_to_alaw_scalar(sample);
175 }
176}
177
178pub fn encode_mulaw_optimized(samples: &[i16], output: &mut [u8]) {
180 #[cfg(target_arch = "x86_64")]
181 {
182 if get_simd_support().sse2 {
183 return encode_mulaw_simd_sse2(samples, output);
184 }
185 }
186
187 #[cfg(target_arch = "aarch64")]
188 {
189 if get_simd_support().neon {
190 return encode_mulaw_simd_neon(samples, output);
191 }
192 }
193
194 encode_mulaw_scalar(samples, output);
195}
196
197pub fn encode_alaw_optimized(samples: &[i16], output: &mut [u8]) {
199 #[cfg(target_arch = "x86_64")]
200 {
201 if get_simd_support().sse2 {
202 return encode_alaw_simd_sse2(samples, output);
203 }
204 }
205
206 #[cfg(target_arch = "aarch64")]
207 {
208 if get_simd_support().neon {
209 return encode_alaw_simd_neon(samples, output);
210 }
211 }
212
213 encode_alaw_scalar(samples, output);
214}
215
216pub fn linear_to_mulaw_scalar(sample: i16) -> u8 {
218 const CLIP: i16 = 32635;
219 const BIAS: i16 = 0x84;
220 const MULAW_MAX: u8 = 0x7F;
221
222 let mut sample = sample;
223 let sign = if sample < 0 {
224 sample = if sample == i16::MIN {
226 i16::MAX
227 } else {
228 -sample
229 };
230 0x80
231 } else {
232 0x00
233 };
234
235 if sample > CLIP {
236 sample = CLIP;
237 }
238
239 sample = sample + BIAS;
240
241 let exponent = if sample <= 0x1F {
242 0
243 } else if sample <= 0x3F {
244 1
245 } else if sample <= 0x7F {
246 2
247 } else if sample <= 0xFF {
248 3
249 } else if sample <= 0x1FF {
250 4
251 } else if sample <= 0x3FF {
252 5
253 } else if sample <= 0x7FF {
254 6
255 } else {
256 7
257 };
258
259 let mantissa = (sample >> (exponent + 3)) & 0x0F;
260 let mulaw = ((exponent << 4) | mantissa) as u8;
261
262 (mulaw ^ MULAW_MAX) | sign
263}
264
265pub fn linear_to_alaw_scalar(sample: i16) -> u8 {
267 const CLIP: i16 = 32635;
268 const ALAW_MAX: u8 = 0x7F;
269
270 let mut sample = sample;
271 let sign = if sample < 0 {
272 sample = if sample == i16::MIN {
274 i16::MAX
275 } else {
276 -sample
277 };
278 0x80
279 } else {
280 0x00
281 };
282
283 if sample > CLIP {
284 sample = CLIP;
285 }
286
287 let alaw = if sample < 256 {
288 sample >> 4
289 } else {
290 let exponent = if sample < 512 {
291 1
292 } else if sample < 1024 {
293 2
294 } else if sample < 2048 {
295 3
296 } else if sample < 4096 {
297 4
298 } else if sample < 8192 {
299 5
300 } else if sample < 16384 {
301 6
302 } else {
303 7
304 };
305
306 let mantissa = (sample >> (exponent + 3)) & 0x0F;
307 ((exponent << 4) | mantissa) + 16
308 };
309
310 ((alaw as u8) ^ ALAW_MAX) | sign
311}
312
313pub fn mulaw_to_linear_scalar(mulaw: u8) -> i16 {
315 const BIAS: i16 = 0x84;
316 const MULAW_MAX: u8 = 0x7F;
317
318 let mulaw = mulaw ^ MULAW_MAX;
319 let sign = mulaw & 0x80;
320 let exponent = (mulaw >> 4) & 0x07;
321 let mantissa = mulaw & 0x0F;
322
323 let mut sample = ((mantissa as i16) << (exponent + 3)) + BIAS;
324
325 if exponent > 0 {
326 sample += 1i16 << (exponent + 2);
327 }
328
329 if sign != 0 {
330 -sample
331 } else {
332 sample
333 }
334}
335
336pub fn alaw_to_linear_scalar(alaw: u8) -> i16 {
338 const ALAW_MAX: u8 = 0x7F;
339
340 let alaw = alaw ^ ALAW_MAX;
341 let sign = alaw & 0x80;
342 let magnitude = alaw & 0x7F;
343
344 let sample = if magnitude < 16 {
345 (magnitude as u16) << 4
346 } else {
347 let exponent = (magnitude >> 4) & 0x07;
348 let mantissa = magnitude & 0x0F;
349
350 let exp_shift = ((exponent + 3) as u32).min(15);
352 let gain_shift = ((exponent + 2) as u32).min(15);
353
354 ((mantissa as u16) << exp_shift) + ((1u16) << gain_shift)
355 } + 8;
356
357 if sign != 0 {
358 -(sample as i16)
359 } else {
360 sample as i16
361 }
362}
363
364#[cfg(test)]
365mod tests {
366 use super::*;
367
368 #[test]
369 fn test_simd_support_detection() {
370 init_simd_support();
371 let support = get_simd_support();
372
373 #[cfg(target_arch = "x86_64")]
375 {
376 println!("SSE2 support: {}", support.sse2);
378 }
379
380 #[cfg(target_arch = "aarch64")]
381 {
382 println!("NEON support: {}", support.neon);
384 }
385 }
386
387 #[test]
388 fn test_mulaw_roundtrip() {
389 let original = 12345i16;
390 let encoded = linear_to_mulaw_scalar(original);
391 let decoded = mulaw_to_linear_scalar(encoded);
392
393 let error = (original - decoded).abs();
395 assert!(error < 1000, "Error too large: {}", error);
396 }
397
398 #[test]
399 fn test_alaw_roundtrip() {
400 let original = 12345i16;
401 let encoded = linear_to_alaw_scalar(original);
402 let decoded = alaw_to_linear_scalar(encoded);
403
404 let error = (original - decoded).abs();
408 assert!(error < 5000, "Error too large: {} (original: {}, decoded: {})", error, original, decoded);
409 }
410
411 #[test]
412 fn test_simd_vs_scalar() {
413 let samples = vec![0, 1000, -1000, 16000, -16000, 32000, -32000, 12345];
414 let mut simd_output = vec![0u8; samples.len()];
415 let mut scalar_output = vec![0u8; samples.len()];
416
417 encode_mulaw_optimized(&samples, &mut simd_output);
418 encode_mulaw_scalar(&samples, &mut scalar_output);
419
420 assert_eq!(simd_output, scalar_output);
422 }
423
424 #[test]
425 fn test_empty_input() {
426 let samples: Vec<i16> = vec![];
427 let mut output: Vec<u8> = vec![];
428
429 encode_mulaw_optimized(&samples, &mut output);
430 assert_eq!(output.len(), 0);
431 }
432
433 #[test]
434 fn test_edge_cases() {
435 let samples = vec![i16::MAX, i16::MIN, 0];
436 let mut output = vec![0u8; samples.len()];
437
438 encode_mulaw_optimized(&samples, &mut output);
439
440 assert_eq!(output.len(), samples.len());
442 }
443}