1#![cfg_attr(feature = "cargo-clippy", allow(clippy::cast_lossless))]
7#![cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
8
9use core::f64;
10
11#[cfg(feature = "alloc")]
12use alloc::vec::Vec;
13
14#[cfg(not(feature = "std"))]
15use libm::F64Ext;
16
17#[cfg(not(feature = "std"))]
18use super::math;
19use super::synth::Synth;
20
21const FIR_RES_FAST: i32 = 51473;
29const FIR_RES_INTERPOLATE: i32 = 285;
30const FIR_SHIFT: i32 = 15;
31const RING_SIZE: usize = 16384;
32
33const FIXP_SHIFT: i32 = 16;
34const FIXP_MASK: i32 = 0xffff;
35
36#[derive(Clone, Copy, PartialEq)]
37pub enum SamplingMethod {
38 Fast,
39 Interpolate,
40 #[cfg(feature = "alloc")]
41 Resample,
42 #[cfg(feature = "alloc")]
43 ResampleFast,
44}
45
46#[derive(Clone)]
47struct Fir {
48 data: Vec<i16>,
49 n: i32,
50 res: i32,
51}
52
53#[derive(Clone)]
54pub struct Sampler {
55 pub synth: Synth,
57 cycles_per_sample: u32,
59 #[cfg(feature = "alloc")]
60 fir: Fir,
61 sampling_method: SamplingMethod,
62 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
63 use_sse42: bool,
64 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
65 use_avx2: bool,
66 buffer: [i16; RING_SIZE * 2],
68 index: usize,
69 offset: i32,
70 prev_sample: i16,
71}
72
73impl Sampler {
74 pub fn new(synth: Synth) -> Self {
75 Sampler {
76 synth,
77 cycles_per_sample: 0,
78 #[cfg(feature = "alloc")]
79 fir: Fir {
80 data: Vec::new(),
81 n: 0,
82 res: 0,
83 },
84 sampling_method: SamplingMethod::Fast,
85 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
86 use_avx2: alloc::is_x86_feature_detected!("avx2"),
87 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
88 use_sse42: alloc::is_x86_feature_detected!("sse4.2"),
89 buffer: [0; RING_SIZE * 2],
90 index: 0,
91 offset: 0,
92 prev_sample: 0,
93 }
94 }
95
96 pub fn set_parameters(&mut self, method: SamplingMethod, clock_freq: u32, sample_freq: u32) {
97 self.cycles_per_sample =
98 (clock_freq as f64 / sample_freq as f64 * (1 << FIXP_SHIFT) as f64 + 0.5) as u32;
99 self.sampling_method = method;
100
101 #[cfg(feature = "alloc")]
102 if self.sampling_method == SamplingMethod::Resample
103 || self.sampling_method == SamplingMethod::ResampleFast
104 {
105 self.init_fir(clock_freq as f64, sample_freq as f64, -1.0, 0.97);
106 }
107 for j in 0..RING_SIZE * 2 {
109 self.buffer[j] = 0;
110 }
111 self.index = 0;
112 self.offset = 0;
113 self.prev_sample = 0;
114 }
115
116 pub fn reset(&mut self) {
117 self.synth.reset();
118 self.index = 0;
119 self.offset = 0;
120 self.prev_sample = 0;
121 }
122
123 #[inline]
124 pub fn clock(&mut self, delta: u32, buffer: &mut [i16], interleave: usize) -> (usize, u32) {
125 match self.sampling_method {
126 SamplingMethod::Fast => self.clock_fast(delta, buffer, interleave),
127 SamplingMethod::Interpolate => self.clock_interpolate(delta, buffer, interleave),
128 #[cfg(feature = "alloc")]
129 SamplingMethod::Resample => self.clock_resample_interpolate(delta, buffer, interleave),
130 #[cfg(feature = "alloc")]
131 SamplingMethod::ResampleFast => self.clock_resample_fast(delta, buffer, interleave),
132 }
133 }
134
135 #[inline]
137 fn clock_fast(
138 &mut self,
139 mut delta: u32,
140 buffer: &mut [i16],
141 interleave: usize,
142 ) -> (usize, u32) {
143 let mut index = 0;
144 loop {
145 let next_sample_offset = self.get_next_sample_offset();
146 let delta_sample = (next_sample_offset >> FIXP_SHIFT) as u32;
147 if delta_sample > delta || index >= buffer.len() {
148 break;
149 }
150 self.synth.clock_delta(delta_sample);
151 delta -= delta_sample;
152 buffer[(index * interleave) as usize] = self.synth.output();
153 index += 1;
154 self.update_sample_offset(next_sample_offset);
155 }
156 if delta > 0 && index < buffer.len() {
157 self.synth.clock_delta(delta);
158 self.offset -= (delta as i32) << FIXP_SHIFT;
159 (index, 0)
160 } else {
161 (index, delta)
162 }
163 }
164
165 #[inline]
166 fn clock_interpolate(
167 &mut self,
168 mut delta: u32,
169 buffer: &mut [i16],
170 interleave: usize,
171 ) -> (usize, u32) {
172 let mut index = 0;
173 loop {
174 let next_sample_offset = self.get_next_sample_offset();
175 let delta_sample = (next_sample_offset >> FIXP_SHIFT) as u32;
176 if delta_sample > delta || index >= buffer.len() {
177 break;
178 }
179 for _i in 0..(delta_sample - 1) {
180 self.prev_sample = self.synth.output();
181 self.synth.clock();
182 }
183 delta -= delta_sample;
184 let sample_now = self.synth.output();
185 buffer[index * interleave] = self.prev_sample
186 + ((self.offset * (sample_now - self.prev_sample) as i32) >> FIXP_SHIFT) as i16;
187 index += 1;
188 self.prev_sample = sample_now;
189 self.update_sample_offset(next_sample_offset);
190 }
191 if delta > 0 && index < buffer.len() {
192 for _i in 0..(delta - 1) {
193 self.synth.clock();
194 }
195 self.offset -= (delta as i32) << FIXP_SHIFT;
196 (index, 0)
197 } else {
198 (index, delta)
199 }
200 }
201
202 #[cfg(feature = "alloc")]
237 #[inline]
238 fn clock_resample_interpolate(
239 &mut self,
240 mut delta: u32,
241 buffer: &mut [i16],
242 interleave: usize,
243 ) -> (usize, u32) {
244 let mut index = 0;
245 let half = 1i32 << 15;
246 loop {
247 let next_sample_offset = self.get_next_sample_offset2();
248 let delta_sample = (next_sample_offset >> FIXP_SHIFT) as u32;
249 if delta_sample > delta || index >= buffer.len() {
250 break;
251 }
252
253 for _i in 0..delta_sample {
254 self.synth.clock();
255 let output = self.synth.output();
256 self.buffer[self.index] = output;
257 self.buffer[self.index + RING_SIZE] = output;
258 self.index += 1;
259 self.index &= 0x3fff;
260 }
261 delta -= delta_sample;
262 self.update_sample_offset2(next_sample_offset);
263
264 let fir_offset_1 = (self.offset * self.fir.res) >> FIXP_SHIFT;
265 let fir_offset_rmd = (self.offset * self.fir.res) & FIXP_MASK;
266 let fir_start_1 = (fir_offset_1 * self.fir.n) as usize;
267 let fir_end_1 = fir_start_1 + self.fir.n as usize;
268 let sample_start_1 = (self.index as i32 - self.fir.n + RING_SIZE as i32) as usize;
269 let sample_end_1 = sample_start_1 + self.fir.n as usize;
270
271 let v1 = self.compute_convolution_fir(
273 &self.buffer[sample_start_1..sample_end_1],
274 &self.fir.data[fir_start_1..fir_end_1],
275 );
276
277 let mut fir_offset_2 = fir_offset_1 + 1;
280 let mut sample_start_2 = sample_start_1;
281 if fir_offset_2 == self.fir.res {
282 fir_offset_2 = 0;
283 sample_start_2 -= 1;
284 }
285 let fir_start_2 = (fir_offset_2 * self.fir.n) as usize;
286 let fir_end_2 = fir_start_2 + self.fir.n as usize;
287 let sample_end_2 = sample_start_2 + self.fir.n as usize;
288
289 let v2 = self.compute_convolution_fir(
290 &self.buffer[sample_start_2..sample_end_2],
291 &self.fir.data[fir_start_2..fir_end_2],
292 );
293
294 let mut v = v1 + ((fir_offset_rmd * (v2 - v1)) >> FIXP_SHIFT);
298 v >>= FIR_SHIFT;
299
300 if v >= half {
302 v = half - 1;
303 } else if v < -half {
304 v = -half;
305 }
306
307 buffer[index * interleave] = v as i16;
308 index += 1;
309 }
310 if delta > 0 && index < buffer.len() {
311 for _i in 0..delta {
312 self.synth.clock();
313 let output = self.synth.output();
314 self.buffer[self.index] = output;
315 self.buffer[self.index + RING_SIZE] = output;
316 self.index += 1;
317 self.index &= 0x3fff;
318 }
319 self.offset -= (delta as i32) << FIXP_SHIFT;
320 (index, 0)
321 } else {
322 (index, delta)
323 }
324 }
325
326 #[cfg(feature = "alloc")]
328 #[inline]
329 fn clock_resample_fast(
330 &mut self,
331 mut delta: u32,
332 buffer: &mut [i16],
333 interleave: usize,
334 ) -> (usize, u32) {
335 let mut index = 0;
336 let half = 1i32 << 15;
337 loop {
338 let next_sample_offset = self.get_next_sample_offset2();
339 let delta_sample = (next_sample_offset >> FIXP_SHIFT) as u32;
340 if delta_sample > delta || index >= buffer.len() {
341 break;
342 }
343
344 for _i in 0..delta_sample {
345 self.synth.clock();
346 let output = self.synth.output();
347 self.buffer[self.index] = output;
348 self.buffer[self.index + RING_SIZE] = output;
349 self.index += 1;
350 self.index &= 0x3fff;
351 }
352 delta -= delta_sample;
353 self.update_sample_offset2(next_sample_offset);
354
355 let fir_offset = (self.offset * self.fir.res) >> FIXP_SHIFT;
356 let fir_start = (fir_offset * self.fir.n) as usize;
357 let fir_end = fir_start + self.fir.n as usize;
358 let sample_start = (self.index as i32 - self.fir.n + RING_SIZE as i32) as usize;
359 let sample_end = sample_start + self.fir.n as usize;
360
361 let mut v = self.compute_convolution_fir(
363 &self.buffer[sample_start..sample_end],
364 &self.fir.data[fir_start..fir_end],
365 );
366 v >>= FIR_SHIFT;
367
368 if v >= half {
370 v = half - 1;
371 } else if v < -half {
372 v = -half;
373 }
374
375 buffer[index * interleave] = v as i16;
376 index += 1;
377 }
378 if delta > 0 && index < buffer.len() {
379 for _i in 0..delta {
380 self.synth.clock();
381 let output = self.synth.output();
382 self.buffer[self.index] = output;
383 self.buffer[self.index + RING_SIZE] = output;
384 self.index += 1;
385 self.index &= 0x3fff;
386 }
387 self.offset -= (delta as i32) << FIXP_SHIFT;
388 (index, 0)
389 } else {
390 (index, delta)
391 }
392 }
393
394 #[inline]
395 pub fn compute_convolution_fir(&self, sample: &[i16], fir: &[i16]) -> i32 {
396 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
397 {
398 if self.use_avx2 {
399 return unsafe { self.compute_convolution_fir_avx2(sample, fir) };
400 }
401 if self.use_sse42 {
402 return unsafe { self.compute_convolution_fir_sse(sample, fir) };
403 }
404 }
405 self.compute_convolution_fir_fallback(sample, fir)
406 }
407
408 #[target_feature(enable = "avx2")]
409 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
410 pub unsafe fn compute_convolution_fir_avx2(&self, sample: &[i16], fir: &[i16]) -> i32 {
411 #[cfg(target_arch = "x86")]
412 use alloc::arch::x86::*;
413 #[cfg(target_arch = "x86_64")]
414 use alloc::arch::x86_64::*;
415
416 let len = alloc::cmp::min(sample.len(), fir.len());
418 let mut fs = &fir[..len];
419 let mut ss = &sample[..len];
420 let mut v1 = _mm256_set1_epi32(0);
421 let mut v2 = _mm256_set1_epi32(0);
422 let mut v3 = _mm256_set1_epi32(0);
423 let mut v4 = _mm256_set1_epi32(0);
424 while fs.len() >= 64 {
425 let sv1 = _mm256_loadu_si256(ss.as_ptr() as *const _);
426 let sv2 = _mm256_loadu_si256((&ss[16..]).as_ptr() as *const _);
427 let sv3 = _mm256_loadu_si256((&ss[32..]).as_ptr() as *const _);
428 let sv4 = _mm256_loadu_si256((&ss[48..]).as_ptr() as *const _);
429 let fv1 = _mm256_loadu_si256(fs.as_ptr() as *const _);
430 let fv2 = _mm256_loadu_si256((&fs[16..]).as_ptr() as *const _);
431 let fv3 = _mm256_loadu_si256((&fs[32..]).as_ptr() as *const _);
432 let fv4 = _mm256_loadu_si256((&fs[48..]).as_ptr() as *const _);
433 let prod1 = _mm256_madd_epi16(sv1, fv1);
434 let prod2 = _mm256_madd_epi16(sv2, fv2);
435 let prod3 = _mm256_madd_epi16(sv3, fv3);
436 let prod4 = _mm256_madd_epi16(sv4, fv4);
437 v1 = _mm256_add_epi32(v1, prod1);
438 v2 = _mm256_add_epi32(v2, prod2);
439 v3 = _mm256_add_epi32(v3, prod3);
440 v4 = _mm256_add_epi32(v4, prod4);
441 fs = &fs[64..];
442 ss = &ss[64..];
443 }
444 v1 = _mm256_add_epi32(v1, v2);
445 v3 = _mm256_add_epi32(v3, v4);
446 v1 = _mm256_add_epi32(v1, v3);
447 let mut va = [0i32; 8];
448 _mm256_storeu_si256(va[..].as_mut_ptr() as *mut _, v1);
449 let mut v = va[0] + va[1] + va[2] + va[3] + va[4] + va[5] + va[6] + va[7];
450 for i in 0..fs.len() {
451 v += ss[i] as i32 * fs[i] as i32;
452 }
453 v
454 }
455
456 #[target_feature(enable = "sse4.2")]
457 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
458 pub unsafe fn compute_convolution_fir_sse(&self, sample: &[i16], fir: &[i16]) -> i32 {
459 #[cfg(target_arch = "x86")]
460 use alloc::arch::x86::*;
461 #[cfg(target_arch = "x86_64")]
462 use alloc::arch::x86_64::*;
463
464 let len = alloc::cmp::min(sample.len(), fir.len());
466 let mut fs = &fir[..len];
467 let mut ss = &sample[..len];
468 let mut v1 = _mm_set1_epi32(0);
469 let mut v2 = _mm_set1_epi32(0);
470 let mut v3 = _mm_set1_epi32(0);
471 let mut v4 = _mm_set1_epi32(0);
472 while fs.len() >= 32 {
473 let sv1 = _mm_loadu_si128(ss.as_ptr() as *const _);
474 let sv2 = _mm_loadu_si128((&ss[8..]).as_ptr() as *const _);
475 let sv3 = _mm_loadu_si128((&ss[16..]).as_ptr() as *const _);
476 let sv4 = _mm_loadu_si128((&ss[24..]).as_ptr() as *const _);
477 let fv1 = _mm_loadu_si128(fs.as_ptr() as *const _);
478 let fv2 = _mm_loadu_si128((&fs[8..]).as_ptr() as *const _);
479 let fv3 = _mm_loadu_si128((&fs[16..]).as_ptr() as *const _);
480 let fv4 = _mm_loadu_si128((&fs[24..]).as_ptr() as *const _);
481 let prod1 = _mm_madd_epi16(sv1, fv1);
482 let prod2 = _mm_madd_epi16(sv2, fv2);
483 let prod3 = _mm_madd_epi16(sv3, fv3);
484 let prod4 = _mm_madd_epi16(sv4, fv4);
485 v1 = _mm_add_epi32(v1, prod1);
486 v2 = _mm_add_epi32(v2, prod2);
487 v3 = _mm_add_epi32(v3, prod3);
488 v4 = _mm_add_epi32(v4, prod4);
489 fs = &fs[32..];
490 ss = &ss[32..];
491 }
492 v1 = _mm_add_epi32(v1, v2);
493 v3 = _mm_add_epi32(v3, v4);
494 v1 = _mm_add_epi32(v1, v3);
495 let mut va = [0i32; 4];
496 _mm_storeu_si128(va[..].as_mut_ptr() as *mut _, v1);
497 let mut v = va[0] + va[1] + va[2] + va[3];
498 for i in 0..fs.len() {
499 v += ss[i] as i32 * fs[i] as i32;
500 }
501 v
502 }
503
504 #[inline]
505 pub fn compute_convolution_fir_fallback(&self, sample: &[i16], fir: &[i16]) -> i32 {
506 if sample.len() < fir.len() {
507 sample
508 .iter()
509 .zip(fir.iter())
510 .fold(0, |sum, (&s, &f)| sum + (s as i32 * f as i32))
511 } else {
512 fir.iter()
513 .zip(sample.iter())
514 .fold(0, |sum, (&f, &s)| sum + (f as i32 * s as i32))
515 }
516 }
517
518 #[inline]
519 fn get_next_sample_offset(&self) -> i32 {
520 self.offset + self.cycles_per_sample as i32 + (1 << (FIXP_SHIFT - 1))
521 }
522
523 #[inline]
524 fn get_next_sample_offset2(&self) -> i32 {
525 self.offset + self.cycles_per_sample as i32
526 }
527
528 #[inline]
529 fn update_sample_offset(&mut self, next_sample_offset: i32) {
530 self.offset = (next_sample_offset & FIXP_MASK) - (1 << (FIXP_SHIFT - 1));
531 }
532
533 #[inline]
534 fn update_sample_offset2(&mut self, next_sample_offset: i32) {
535 self.offset = next_sample_offset & FIXP_MASK;
536 }
537
538 #[cfg(feature = "alloc")]
539 fn init_fir(
540 &mut self,
541 clock_freq: f64,
542 sample_freq: f64,
543 mut pass_freq: f64,
544 filter_scale: f64,
545 ) {
546 let pi = core::f64::consts::PI;
547 let samples_per_cycle = sample_freq / clock_freq;
548 let cycles_per_sample = clock_freq / sample_freq;
549
550 if pass_freq < 0.0 {
553 pass_freq = 20000.0;
554 if 2.0 * pass_freq / sample_freq >= 0.9 {
555 pass_freq = 0.9 * sample_freq / 2.0;
556 }
557 }
558
559 let atten = -20.0f64 * (1.0 / (1i32 << 16) as f64).log10();
561 let dw = (1.0f64 - 2.0 * pass_freq / sample_freq) * pi;
563 let wc = (2.0f64 * pass_freq / sample_freq + 1.0) * pi / 2.0;
565
566 let beta = 0.1102f64 * (atten - 8.7);
570 let io_beta = self.i0(beta);
571
572 let mut n_cap = ((atten - 7.95) / (2.285 * dw) + 0.5) as i32;
577 n_cap += n_cap & 1;
578
579 self.fir.n = (n_cap as f64 * cycles_per_sample) as i32 + 1;
582 self.fir.n |= 1;
583
584 let res = if self.sampling_method == SamplingMethod::Resample {
587 FIR_RES_INTERPOLATE
588 } else {
589 FIR_RES_FAST
590 };
591 let n = ((res as f64 / cycles_per_sample).ln() / (2.0f64).ln()).ceil() as i32;
592 self.fir.res = 1 << n;
593
594 self.fir.data.clear();
595 self.fir
596 .data
597 .resize((self.fir.n * self.fir.res) as usize, 0);
598
599 for i in 0..self.fir.res {
601 let fir_offset = i * self.fir.n + self.fir.n / 2;
602 let j_offset = i as f64 / self.fir.res as f64;
603 let fir_n_div2 = self.fir.n / 2;
606 for j in -fir_n_div2..=fir_n_div2 {
607 let jx = j as f64 - j_offset;
608 let wt = wc * jx / cycles_per_sample;
609 let temp = jx / fir_n_div2 as f64;
610 let kaiser = if temp.abs() <= 1.0 {
611 self.i0(beta * self.sqrt(1.0 - temp * temp)) / io_beta
612 } else {
613 0f64
614 };
615 let sincwt = if wt.abs() >= 1e-6 { wt.sin() / wt } else { 1.0 };
616 let val = (1i32 << FIR_SHIFT) as f64 * filter_scale * samples_per_cycle * wc / pi
617 * sincwt
618 * kaiser;
619 self.fir.data[(fir_offset + j) as usize] = (val + 0.5) as i16;
620 }
621 }
622 }
623
624 fn i0(&self, x: f64) -> f64 {
625 let i0e = 1e-6;
627 let halfx = x / 2.0;
628 let mut sum = 1.0;
629 let mut u = 1.0;
630 let mut n = 1;
631 loop {
632 let temp = halfx / n as f64;
633 n += 1;
634 u *= temp * temp;
635 sum += u;
636 if u < i0e * sum {
637 break;
638 }
639 }
640 sum
641 }
642
643 #[cfg(feature = "std")]
644 fn sqrt(&self, value: f64) -> f64 {
645 value.sqrt()
646 }
647
648 #[cfg(not(feature = "std"))]
649 fn sqrt(&self, value: f64) -> f64 {
650 math::sqrt(value)
651 }
652}