idsp/
hbf.rs

1//! Half-band filters and cascades
2//!
3//! Used to perform very efficient high-dynamic range rate changes by powers of two.
4//!
5//! Symmetric and anti-symmetric FIR filter prototype.
6//!
7//! # Generics
8//! * `M`: number of taps, one-sided. The filter has effectively 2*M DSP taps
9//!
10//! # Half band decimation/interpolation filters
11//!
12//! Half-band filters (rate change of 2) and cascades of HBFs are implemented in
13//! [`HbfDec`] and [`HbfInt`] etc.
14//! The half-band filter has unique properties that make it preferable in many cases:
15//!
16//! * only needs M multiplications (fused multiply accumulate) for 4*M taps
17//! * HBF decimator stores less state than a generic FIR filter
18//! * as a FIR filter has linear phase/flat group delay
19//! * very small passband ripple and excellent stopband attenuation
20//! * as a cascade of decimation/interpolation filters, the higher-rate filters
21//!   need successively fewer taps, allowing the filtering to be dominated by
22//!   only the highest rate filter with the fewest taps
23//! * In a cascade of HBF the overall latency, group delay, and impulse response
24//!   length are dominated by the lowest-rate filter which, due to its manageable transition
25//!   band width (compared to single-stage filters) can be smaller, shorter, and faster.
26//! * high dynamic range and inherent stability compared with an IIR filter
27//! * can be combined with a CIC filter for non-power-of-two or even higher rate changes
28//!
29//! The implementations here are all `no_std` and `no-alloc`.
30//! They support (but don't require) in-place filtering to reduce memory usage.
31//! They unroll and optimize extremely well targetting current architectures,
32//! e.g. requiring less than 4 instructions per input item for the full `HbfDecCascade` on Skylake.
33//! The filters are optimized for decent block sizes and perform best (i.e. with negligible
34//! overhead) for blocks of 32 high-rate items or more, depending very much on architecture.
35
36use core::{
37    iter::Sum,
38    ops::{Add, Mul, Sub},
39    slice::{from_mut, from_ref},
40};
41
42use dsp_process::{ChunkIn, ChunkOut, Major, SplitProcess};
43
44/// Perform the FIR convolution and yield results iteratively.
45#[inline]
46fn get<C: Copy, T, const M: usize, const ODD: bool, const SYM: bool>(
47    c: &[C; M],
48    x: &[T],
49) -> impl Iterator<Item = T>
50where
51    T: Copy + Add<Output = T> + Sub<Output = T> + Mul<C, Output = T> + Sum,
52{
53    // https://doc.rust-lang.org/std/primitive.slice.html#method.array_windows
54    x.windows(2 * M + ODD as usize).map(|x| {
55        let Some((old, new)) = x.first_chunk::<M>().zip(x.last_chunk::<M>()) else {
56            unreachable!()
57        };
58        // Taps from small (large distance from center) to large (center taps)
59        // to reduce FP cancellation a bit
60        let xc = old
61            .iter()
62            .zip(new.iter().rev())
63            .zip(c.iter())
64            .map(|((old, new), tap)| (if SYM { *new + *old } else { *new - *old }) * *tap)
65            .sum();
66        if ODD { xc + x[M] } else { xc }
67    })
68}
69
70macro_rules! type_fir {
71    ($name:ident, $odd:literal, $sym:literal) => {
72        #[doc = concat!("Linear phase FIR taps for odd = ", stringify!($odd), " and symmetric = ", stringify!($sym))]
73        #[derive(Clone, Copy, Debug, Default)]
74        #[repr(transparent)]
75        pub struct $name<C>(pub C);
76        impl<T, const M: usize> $name<[T; M]> {
77            /// Response length/number of taps minus one
78            pub const LEN: usize = 2 * M - 1 + $odd as usize;
79        }
80
81        impl<
82            C: Copy,
83            T: Copy + Default + Sub<T, Output = T> + Add<Output = T> + Mul<C, Output = T> + Sum,
84            const M: usize,
85            const N: usize,
86        > SplitProcess<T, T, [T; N]> for $name<[C; M]>
87        {
88            fn block(&self, state: &mut [T; N], x: &[T], y: &mut [T]) {
89                for (x, y) in x.chunks(N - Self::LEN).zip(y.chunks_mut(N - Self::LEN)) {
90                    state[Self::LEN..][..x.len()].copy_from_slice(x);
91                    for (y, x) in y.iter_mut().zip(get::<_, _, _, $odd, $sym>(&self.0, state)) {
92                        *y = x;
93                    }
94                    state.copy_within(x.len()..x.len() + Self::LEN, 0);
95                }
96            }
97
98            fn process(&self, state: &mut [T; N], x: T) -> T {
99                let mut y = T::default();
100                self.block(state, from_ref(&x), from_mut(&mut y));
101                y
102            }
103        }
104    };
105}
106// Type 1 taps
107// Center tap is unity
108type_fir!(OddSymmetric, true, true);
109// Type 2 taps
110type_fir!(EvenSymmetric, false, true);
111// Type 3 taps
112// Center tap is zero
113type_fir!(OddAntiSymmetric, true, false);
114// Type 4 taps
115type_fir!(EvenAntiSymmetric, false, false);
116
117/// Half band decimator (decimate by two) state
118#[derive(Clone, Debug)]
119pub struct HbfDec<T> {
120    even: T, // at least N - M len
121    odd: T,  // N > 2*M - 1 (=EvenSymmetric::LEN)
122}
123
124impl<T: Copy + Default, const N: usize> Default for HbfDec<[T; N]> {
125    fn default() -> Self {
126        Self {
127            even: [T::default(); _],
128            odd: [T::default(); _],
129        }
130    }
131}
132
133impl<
134    C: Copy,
135    T: Copy + Default + Add<Output = T> + Sub<Output = T> + Mul<C, Output = T> + Sum,
136    const M: usize,
137    const N: usize,
138> SplitProcess<[T; 2], T, HbfDec<[T; N]>> for EvenSymmetric<[C; M]>
139{
140    fn block(&self, state: &mut HbfDec<[T; N]>, x: &[[T; 2]], y: &mut [T]) {
141        debug_assert_eq!(x.len(), y.len());
142        const { assert!(N > Self::LEN) }
143        for (x, y) in x.chunks(N - Self::LEN).zip(y.chunks_mut(N - Self::LEN)) {
144            // load input
145            for (x, (even, odd)) in x.iter().zip(
146                state.even[M - 1..]
147                    .iter_mut()
148                    .zip(state.odd[Self::LEN..].iter_mut()),
149            ) {
150                *even = x[0];
151                *odd = x[1];
152            }
153            // compute output
154            let odd = get::<_, _, _, false, true>(&self.0, &state.odd);
155            for (y, (odd, even)) in y.iter_mut().zip(odd.zip(state.even.iter().copied())) {
156                *y = odd + even;
157            }
158            // keep state
159            state.even.copy_within(x.len()..x.len() + M - 1, 0);
160            state.odd.copy_within(x.len()..x.len() + Self::LEN, 0);
161        }
162    }
163
164    fn process(&self, state: &mut HbfDec<[T; N]>, x: [T; 2]) -> T {
165        let mut y = Default::default();
166        self.block(state, from_ref(&x), from_mut(&mut y));
167        y
168    }
169}
170
171/// Half band interpolator (interpolation rate 2) state
172#[derive(Clone, Debug)]
173pub struct HbfInt<T> {
174    x: T, // len N > EvenSymmetric::LEN
175}
176
177impl<T: Default + Copy, const N: usize> Default for HbfInt<[T; N]> {
178    fn default() -> Self {
179        Self {
180            x: [T::default(); _],
181        }
182    }
183}
184
185impl<
186    C: Copy,
187    T: Copy + Default + Add<Output = T> + Sub<Output = T> + Mul<C, Output = T> + Sum,
188    const M: usize,
189    const N: usize,
190> SplitProcess<T, [T; 2], HbfInt<[T; N]>> for EvenSymmetric<[C; M]>
191{
192    fn block(&self, state: &mut HbfInt<[T; N]>, x: &[T], y: &mut [[T; 2]]) {
193        debug_assert_eq!(x.len(), y.len());
194        const { assert!(N > Self::LEN) }
195        for (x, y) in x.chunks(N - Self::LEN).zip(y.chunks_mut(N - Self::LEN)) {
196            // load input
197            state.x[Self::LEN..][..x.len()].copy_from_slice(x);
198            // compute output
199            let odd = get::<_, _, _, false, true>(&self.0, &state.x);
200            for (y, (even, odd)) in y.iter_mut().zip(odd.zip(state.x[M..].iter().copied())) {
201                *y = [even, odd]; // interpolated, center tap: identity
202            }
203            // keep state
204            state.x.copy_within(x.len()..x.len() + Self::LEN, 0);
205        }
206    }
207
208    fn process(&self, state: &mut HbfInt<[T; N]>, x: T) -> [T; 2] {
209        let mut y = Default::default();
210        self.block(state, from_ref(&x), from_mut(&mut y));
211        y
212    }
213}
214
215/// Half band filter cascade taps for a 98 dB filter
216type HbfTaps98 = (
217    EvenSymmetric<[f32; 15]>,
218    EvenSymmetric<[f32; 6]>,
219    EvenSymmetric<[f32; 3]>,
220    EvenSymmetric<[f32; 3]>,
221    EvenSymmetric<[f32; 2]>,
222);
223
224/// Half band filter cascade taps
225///
226/// * obtained with `signal.remez(2*n, bands=(0, .4, .5, .5), desired=(1, 0), fs=1, grid_density=1<<10)[:n]`
227/// * more than 98 dB stop band attenuation (>16 bit)
228/// * 0.4 pass band (relative to lowest sample rate)
229/// * less than 0.001 dB ripple
230/// * linear phase/flat group delay
231/// * rate change up to 2**5 = 32
232/// * lowest rate filter is at 0 index
233/// * use taps 0..n for 2**n interpolation/decimation
234#[allow(clippy::excessive_precision)]
235pub const HBF_TAPS_98: HbfTaps98 = (
236    // n=15 coefficients (effective number of DSP taps 4*15-1 = 59), transition band width df=.2 fs
237    EvenSymmetric([
238        7.02144012e-05,
239        -2.43279582e-04,
240        6.35026936e-04,
241        -1.39782541e-03,
242        2.74613582e-03,
243        -4.96403839e-03,
244        8.41806912e-03,
245        -1.35827601e-02,
246        2.11004053e-02,
247        -3.19267647e-02,
248        4.77024289e-02,
249        -7.18014345e-02,
250        1.12942004e-01,
251        -2.03279594e-01,
252        6.33592923e-01,
253    ]),
254    // 6, .47
255    EvenSymmetric([
256        -0.00086943,
257        0.00577837,
258        -0.02201674,
259        0.06357869,
260        -0.16627679,
261        0.61979312,
262    ]),
263    // 3, .754
264    EvenSymmetric([0.01414651, -0.10439639, 0.59026742]),
265    // 3, .877
266    EvenSymmetric([0.01227974, -0.09930782, 0.58702834]),
267    // 2, .94
268    EvenSymmetric([-0.06291796, 0.5629161]),
269);
270
271/// Half band filter cascade taps
272type HbfTaps = (
273    EvenSymmetric<[f32; 23]>,
274    EvenSymmetric<[f32; 10]>,
275    EvenSymmetric<[f32; 5]>,
276    EvenSymmetric<[f32; 4]>,
277    EvenSymmetric<[f32; 3]>,
278);
279
280/// Half band filters taps
281///
282/// * 140 dB stopband, 0.2 µB passband ripple, limited by f32 dynamic range
283/// * otherwise like [`HBF_TAPS_98`].
284#[allow(clippy::excessive_precision)]
285pub const HBF_TAPS: HbfTaps = (
286    EvenSymmetric([
287        7.60375795e-07,
288        -3.77494111e-06,
289        1.26458559e-05,
290        -3.43188253e-05,
291        8.10687478e-05,
292        -1.72971467e-04,
293        3.40845059e-04,
294        -6.29522864e-04,
295        1.10128831e-03,
296        -1.83933299e-03,
297        2.95124926e-03,
298        -4.57290964e-03,
299        6.87374176e-03,
300        -1.00656257e-02,
301        1.44199840e-02,
302        -2.03025100e-02,
303        2.82462332e-02,
304        -3.91128509e-02,
305        5.44795658e-02,
306        -7.77002672e-02,
307        1.17523452e-01,
308        -2.06185388e-01,
309        6.34588695e-01,
310    ]),
311    EvenSymmetric([
312        -1.12811343e-05,
313        1.12724671e-04,
314        -6.07439343e-04,
315        2.31904511e-03,
316        -7.00322950e-03,
317        1.78225473e-02,
318        -4.01209836e-02,
319        8.43315989e-02,
320        -1.83189521e-01,
321        6.26346521e-01,
322    ]),
323    EvenSymmetric([0.0007686, -0.00768669, 0.0386536, -0.14002434, 0.60828885]),
324    EvenSymmetric([-0.00261331, 0.02476858, -0.12112638, 0.59897111]),
325    EvenSymmetric([0.01186105, -0.09808109, 0.58622005]),
326);
327
328/// Passband width in units of lowest sample rate
329pub const HBF_PASSBAND: f32 = 0.4;
330
331/// Cascade block size
332///
333/// Heuristically performs well.
334pub const HBF_CASCADE_BLOCK: usize = 1 << 5;
335
336/// Half-band decimation filter state
337///
338/// See [HBF_TAPS] and [HBF_DEC_CASCADE].
339/// Supports rate changes are power of two up to 32.
340pub type HbfDec2 =
341    HbfDec<[f32; EvenSymmetric::<[f32; HBF_TAPS.0.0.len()]>::LEN + HBF_CASCADE_BLOCK]>;
342/// HBF Decimate-by-4 cascade state
343pub type HbfDec4 = (
344    HbfDec<[f32; EvenSymmetric::<[f32; HBF_TAPS.1.0.len()]>::LEN + (HBF_CASCADE_BLOCK << 1)]>,
345    HbfDec2,
346);
347/// HBF Decimate-by-8 cascade state
348pub type HbfDec8 = (
349    HbfDec<[f32; EvenSymmetric::<[f32; HBF_TAPS.2.0.len()]>::LEN + (HBF_CASCADE_BLOCK << 2)]>,
350    HbfDec4,
351);
352/// HBF Decimate-by-16 cascade state
353pub type HbfDec16 = (
354    HbfDec<[f32; EvenSymmetric::<[f32; HBF_TAPS.3.0.len()]>::LEN + (HBF_CASCADE_BLOCK << 3)]>,
355    HbfDec8,
356);
357/// HBF Decimate-by-32 cascade state
358pub type HbfDec32 = (
359    HbfDec<[f32; EvenSymmetric::<[f32; HBF_TAPS.4.0.len()]>::LEN + (HBF_CASCADE_BLOCK << 4)]>,
360    HbfDec16,
361);
362
363type HbfDecCascade<const B: usize = HBF_CASCADE_BLOCK> = Major<
364    (
365        ChunkIn<&'static EvenSymmetric<[f32; HBF_TAPS.4.0.len()]>, 2>,
366        Major<
367            (
368                ChunkIn<&'static EvenSymmetric<[f32; HBF_TAPS.3.0.len()]>, 2>,
369                Major<
370                    (
371                        ChunkIn<&'static EvenSymmetric<[f32; HBF_TAPS.2.0.len()]>, 2>,
372                        Major<
373                            (
374                                ChunkIn<&'static EvenSymmetric<[f32; HBF_TAPS.1.0.len()]>, 2>,
375                                &'static EvenSymmetric<[f32; HBF_TAPS.0.0.len()]>,
376                            ),
377                            [[f32; 2]; B],
378                        >,
379                    ),
380                    [[f32; 4]; B],
381                >,
382            ),
383            [[f32; 8]; B],
384        >,
385    ),
386    [[f32; 16]; B],
387>;
388
389/// HBF decimation cascade
390pub const HBF_DEC_CASCADE: HbfDecCascade = Major::new((
391    ChunkIn(&HBF_TAPS.4),
392    Major::new((
393        ChunkIn(&HBF_TAPS.3),
394        Major::new((
395            ChunkIn(&HBF_TAPS.2),
396            Major::new((ChunkIn(&HBF_TAPS.1), &HBF_TAPS.0)),
397        )),
398    )),
399));
400
401/// Response length, effective number of taps
402pub const fn hbf_dec_response_length(depth: usize) -> usize {
403    assert!(depth < 5);
404    let mut n = 0;
405    if depth > 0 {
406        n /= 2;
407        n += EvenSymmetric::<[f32; HBF_TAPS.3.0.len()]>::LEN;
408    }
409    if depth > 1 {
410        n /= 2;
411        n += EvenSymmetric::<[f32; HBF_TAPS.2.0.len()]>::LEN;
412    }
413    if depth > 2 {
414        n /= 2;
415        n += EvenSymmetric::<[f32; HBF_TAPS.1.0.len()]>::LEN;
416    }
417    if depth > 3 {
418        n /= 2;
419        n += EvenSymmetric::<[f32; HBF_TAPS.0.0.len()]>::LEN;
420    }
421    n
422}
423
424/// Half-band interpolation filter state
425///
426/// See [HBF_TAPS] and [HBF_INT_CASCADE].
427/// Supports rate changes are power of two up to 32.
428pub type HbfInt2 =
429    HbfInt<[f32; EvenSymmetric::<[f32; HBF_TAPS.0.0.len()]>::LEN + HBF_CASCADE_BLOCK]>;
430/// HBF interpolate-by-4 cascade state
431pub type HbfInt4 = (
432    HbfInt2,
433    HbfInt<[f32; EvenSymmetric::<[f32; HBF_TAPS.1.0.len()]>::LEN + (HBF_CASCADE_BLOCK << 1)]>,
434);
435/// HBF interpolate-by-8 cascade state
436pub type HbfInt8 = (
437    HbfInt4,
438    HbfInt<[f32; EvenSymmetric::<[f32; HBF_TAPS.2.0.len()]>::LEN + (HBF_CASCADE_BLOCK << 2)]>,
439);
440/// HBF interpolate-by-16 cascade state
441pub type HbfInt16 = (
442    HbfInt8,
443    HbfInt<[f32; EvenSymmetric::<[f32; HBF_TAPS.3.0.len()]>::LEN + (HBF_CASCADE_BLOCK << 3)]>,
444);
445/// HBF interpolate-by-32 cascade state
446pub type HbfInt32 = (
447    HbfInt16,
448    HbfInt<[f32; EvenSymmetric::<[f32; HBF_TAPS.4.0.len()]>::LEN + (HBF_CASCADE_BLOCK << 4)]>,
449);
450
451type HbfIntCascade<const B: usize = HBF_CASCADE_BLOCK> = Major<
452    (
453        Major<
454            (
455                Major<
456                    (
457                        Major<
458                            (
459                                &'static EvenSymmetric<[f32; HBF_TAPS.0.0.len()]>,
460                                ChunkOut<&'static EvenSymmetric<[f32; HBF_TAPS.1.0.len()]>, 2>,
461                            ),
462                            [[f32; 2]; B],
463                        >,
464                        ChunkOut<&'static EvenSymmetric<[f32; HBF_TAPS.2.0.len()]>, 2>,
465                    ),
466                    [[f32; 4]; B],
467                >,
468                ChunkOut<&'static EvenSymmetric<[f32; HBF_TAPS.3.0.len()]>, 2>,
469            ),
470            [[f32; 8]; B],
471        >,
472        ChunkOut<&'static EvenSymmetric<[f32; HBF_TAPS.4.0.len()]>, 2>,
473    ),
474    [[f32; 16]; B],
475>;
476
477/// HBF interpolation cascade
478pub const HBF_INT_CASCADE: HbfIntCascade = Major::new((
479    Major::new((
480        Major::new((
481            Major::new((&HBF_TAPS.0, ChunkOut(&HBF_TAPS.1))),
482            ChunkOut(&HBF_TAPS.2),
483        )),
484        ChunkOut(&HBF_TAPS.3),
485    )),
486    ChunkOut(&HBF_TAPS.4),
487));
488
489/// Response length, effective number of taps
490pub const fn hbf_int_response_length(depth: usize) -> usize {
491    assert!(depth < 5);
492    let mut n = 0;
493    if depth > 0 {
494        n += EvenSymmetric::<[f32; HBF_TAPS.0.0.len()]>::LEN;
495        n *= 2;
496    }
497    if depth > 1 {
498        n += EvenSymmetric::<[f32; HBF_TAPS.1.0.len()]>::LEN;
499        n *= 2;
500    }
501    if depth > 2 {
502        n += EvenSymmetric::<[f32; HBF_TAPS.2.0.len()]>::LEN;
503        n *= 2;
504    }
505    if depth > 3 {
506        n += EvenSymmetric::<[f32; HBF_TAPS.3.0.len()]>::LEN;
507        n *= 2;
508    }
509    n
510}
511
512#[cfg(test)]
513mod test {
514    use super::*;
515    use dsp_process::{Process, Split};
516    use rustfft::{FftPlanner, num_complex::Complex};
517
518    #[test]
519    fn test() {
520        let mut h = Split::new(EvenSymmetric([0.5]), HbfDec::<[_; 5]>::default());
521        h.as_mut().block(&[], &mut []);
522
523        let x = [1.0; 8];
524        let mut y = [0.0; 4];
525        h.as_mut().block(x.as_chunks().0, &mut y);
526        assert_eq!(y, [1.5, 2.0, 2.0, 2.0]);
527
528        let mut h = Split::new(&HBF_TAPS.3, HbfDec::<[_; 11]>::default());
529        let x: Vec<_> = (0..8).map(|i| i as f32).collect();
530        h.as_mut().block(x.as_chunks().0, &mut y);
531        println!("{:?}", y);
532    }
533
534    #[test]
535    fn decim() {
536        let mut h = HbfDec16::default();
537        const R: usize = 1 << 4;
538        let mut y = vec![0.0; 2];
539        let x: Vec<_> = (0..y.len() * R).map(|i| i as f32).collect();
540        HBF_DEC_CASCADE
541            .inner
542            .1
543            .block(&mut h, x.as_chunks::<R>().0, &mut y);
544        println!("{:?}", y);
545    }
546
547    #[test]
548    fn response_length_dec() {
549        let mut h = HbfDec16::default();
550        const R: usize = 4;
551        let mut y = [0.0; 100];
552        let x: Vec<f32> = (0..y.len() << R).map(|_| rand::random()).collect();
553        HBF_DEC_CASCADE
554            .inner
555            .1
556            .block(&mut h, x.as_chunks::<{ 1 << R }>().0, &mut y);
557        let x = vec![0.0; 1 << 10];
558        HBF_DEC_CASCADE.inner.1.block(
559            &mut h,
560            x.as_chunks::<{ 1 << R }>().0,
561            &mut y[..x.len() >> R],
562        );
563        let n = hbf_dec_response_length(R);
564        assert!(y[n - 1] != 0.0);
565        assert_eq!(y[n], 0.0);
566    }
567
568    #[test]
569    fn interp() {
570        let mut h = HbfInt16::default();
571        const R: usize = 4;
572        let r = hbf_int_response_length(R);
573        let mut x = vec![0.0; (r >> R) + 1];
574        x[0] = 1.0;
575        let mut y = vec![[0.0; 1 << R]; x.len()];
576        HBF_INT_CASCADE.inner.0.block(&mut h, &x, &mut y);
577        println!("{:?}", y); // interpolator impulse response
578        let y = y.as_flattened();
579        assert_ne!(y[r], 0.0);
580        assert_eq!(y[r + 1..], vec![0.0; y.len() - r - 1]);
581
582        let mut y: Vec<_> = y
583            .iter()
584            .map(|&x| Complex {
585                re: x as f64 / (1 << R) as f64,
586                im: 0.0,
587            })
588            .collect();
589        // pad
590        y.resize(5 << 10, Default::default());
591        FftPlanner::new().plan_fft_forward(y.len()).process(&mut y);
592        // transfer function
593        let p: Vec<_> = y.iter().map(|y| 10.0 * y.norm_sqr().log10()).collect();
594        let f = p.len() as f32 / (1 << R) as f32;
595        // pass band ripple
596        let p_pass = p[..(f * HBF_PASSBAND).floor() as _]
597            .iter()
598            .fold(0.0, |m, p| p.abs().max(m));
599        assert!(p_pass < 1e-6, "{p_pass}");
600        // stop band attenuation
601        let p_stop = p[(f * (1.0 - HBF_PASSBAND)).ceil() as _..p.len() / 2]
602            .iter()
603            .fold(-200.0, |m, p| p.max(m));
604        assert!(p_stop < -141.5, "{p_stop}");
605    }
606
607    /// small 32 block size, single stage, 3 mul (11 tap) decimator
608    /// 2.5 cycles per input item, > 2 GS/s per core on Skylake
609    #[test]
610    #[ignore]
611    fn insn_dec() {
612        const N: usize = HBF_TAPS.4.0.len();
613        assert_eq!(N, 3);
614        const M: usize = 1 << 4;
615        let mut h = HbfDec::<[_; EvenSymmetric::<[f32; N]>::LEN + M]>::default();
616        let mut x = [[9.0; 2]; M];
617        let mut y = [0.0; M];
618        for _ in 0..1 << 25 {
619            HBF_TAPS.4.block(&mut h, &x, &mut y);
620            x[13][1] = y[11]; // prevent the entire loop from being optimized away
621        }
622    }
623
624    /// 1k block size, single stage, 23 mul (91 tap) decimator
625    /// 2.6 cycles: > 1 GS/s
626    #[test]
627    #[ignore]
628    fn insn_dec2() {
629        const N: usize = HBF_TAPS.0.0.len();
630        assert_eq!(N, 23);
631        const M: usize = 1 << 9;
632        let mut h = HbfDec::<[_; EvenSymmetric::<[f32; N]>::LEN + M]>::default();
633        let mut x = [[9.0; 2]; M];
634        let mut y = [0.0; M];
635        for _ in 0..1 << 20 {
636            HBF_TAPS.0.block(&mut h, &x, &mut y);
637            x[33][1] = y[11]; // prevent the entire loop from being optimized away
638        }
639    }
640
641    /// full block size full decimator cascade (depth 4, 1024 items per input block)
642    /// 1.8 cycles: > 2 GS/s
643    #[test]
644    #[ignore]
645    fn insn_casc() {
646        let mut h = HbfDec16::default();
647        const R: usize = 4;
648        let mut x = [[9.0; 1 << R]; 1 << 6];
649        let mut y = [0.0; 1 << 6];
650        for _ in 0..1 << 20 {
651            HBF_DEC_CASCADE.inner.1.block(&mut h, &x, &mut y);
652            x[33][1] = y[11]; // prevent the entire loop from being optimized away
653        }
654    }
655
656    // // sdr crate, setup like insn_dec2()
657    // // 187 insn
658    // #[test]
659    // #[ignore]
660    // fn insn_sdr() {
661    //     use sdr::fir;
662    //     const N: usize = HBF_TAPS.0.len();
663    //     const M: usize = 1 << 10;
664    //     let mut taps = [0.0f64; { 4 * N - 1 }];
665    //     let (old, new) = taps.split_at_mut(2 * N - 1);
666    //     for (tap, (old, new)) in HBF_TAPS.0.iter().zip(
667    //         old.iter_mut()
668    //             .step_by(2)
669    //             .zip(new.iter_mut().rev().step_by(2)),
670    //     ) {
671    //         *old = (*tap * 0.5).into();
672    //         *new = *old;
673    //     }
674    //     taps[2 * N - 1] = 0.5;
675    //     let mut h = fir::FIR::new(&taps, 2, 1);
676    //     let x = [9.0; M];
677    //     // let mut h1 = HbfDec::<N, { 2 * N - 1 + M }>::new(&HBF_TAPS.0);
678    //     // let mut y1 = [0.0; M / 2];
679    //     for _ in 0..1 << 16 {
680    //         let _y = h.process(&x);
681    //         // h1.process_block(Some(&x), &mut y1);
682    //         // assert_eq!(y1.len(), y.len());
683    //         // assert!(y1.iter().zip(y.iter()).all(|(y1, y)| (y1 - y).abs() < 1e-6));
684    //     }
685    // }
686
687    // // // futuredsp crate, setup like insn_dec2()
688    // // // 315 insn
689    // #[test]
690    // #[ignore]
691    // fn insn_futuredsp() {
692    //     use futuredsp::{fir::PolyphaseResamplingFirKernel, UnaryKernel};
693    //     const N: usize = HBF_TAPS.0.len();
694    //     const M: usize = 1 << 10;
695    //     let mut taps = [0.0f32; { 4 * N - 1 }];
696    //     let (old, new) = taps.split_at_mut(2 * N - 1);
697    //     for (tap, (old, new)) in HBF_TAPS.0.iter().zip(
698    //         old.iter_mut()
699    //             .step_by(2)
700    //             .zip(new.iter_mut().rev().step_by(2)),
701    //     ) {
702    //         *old = *tap * 0.5;
703    //         *new = *old;
704    //     }
705    //     taps[2 * N - 1] = 0.5;
706    //     let x = [9.0f32; M];
707    //     let mut y = [0.0f32; M];
708    //     let fir = PolyphaseResamplingFirKernel::<_, _, _, _>::new(1, 2, taps);
709    //     for _ in 0..1 << 14 {
710    //         fir.work(&x, &mut y);
711    //     }
712    // }
713}