Skip to main content

dsp_process/
compose.rs

1use crate::{
2    LaneMajor, SplitInplace, SplitProcess, SplitViewInplace, SplitViewProcess, View, ViewMut,
3};
4use core::marker::PhantomData;
5
6//////////// SPLIT COMPOSE ////////////
7
8/// Chain two different processors with an explicit intermediate type.
9///
10/// This is the heterogeneous serial-composition primitive for tuples. The first
11/// stage may change the sample type, while the second stage must accept that
12/// intermediate value in place.
13impl<X: Copy, Y: Copy, C0, C1, S0, S1> SplitProcess<X, Y, (S0, S1)> for (C0, C1)
14where
15    C0: SplitProcess<X, Y, S0>,
16    C1: SplitInplace<Y, S1>,
17{
18    fn process(&self, state: &mut (S0, S1), x: X) -> Y {
19        self.1
20            .process(&mut state.1, self.0.process(&mut state.0, x))
21    }
22
23    fn block(&self, state: &mut (S0, S1), x: &[X], y: &mut [Y]) {
24        self.0.block(&mut state.0, x, y);
25        self.1.inplace(&mut state.1, y);
26    }
27}
28
29impl<X: Copy, C0, C1, S0, S1> SplitInplace<X, (S0, S1)> for (C0, C1)
30where
31    C0: SplitInplace<X, S0>,
32    C1: SplitInplace<X, S1>,
33{
34    fn inplace(&self, state: &mut (S0, S1), xy: &mut [X]) {
35        self.0.inplace(&mut state.0, xy);
36        self.1.inplace(&mut state.1, xy);
37    }
38}
39
40/// Chain multiple homogeneous processors over one sample type.
41///
42/// The slice may be empty, in which case `block()` acts as identity.
43impl<X: Copy, C, S> SplitProcess<X, X, [S]> for [C]
44where
45    C: SplitInplace<X, S>,
46{
47    fn process(&self, state: &mut [S], x: X) -> X {
48        debug_assert_eq!(self.len(), state.len());
49        self.iter()
50            .zip(state.iter_mut())
51            .fold(x, |x, (c, s)| c.process(s, x))
52    }
53
54    fn block(&self, state: &mut [S], x: &[X], y: &mut [X]) {
55        debug_assert_eq!(self.len(), state.len());
56        if let Some(((c0, c), (s0, s))) = self.split_first().zip(state.split_first_mut()) {
57            c0.block(s0, x, y);
58            for (c, s) in c.iter().zip(s) {
59                c.inplace(s, y);
60            }
61        } else {
62            y.copy_from_slice(x);
63        }
64    }
65}
66
67impl<X: Copy, C, S> SplitInplace<X, [S]> for [C]
68where
69    C: SplitInplace<X, S>,
70{
71    fn inplace(&self, state: &mut [S], xy: &mut [X]) {
72        debug_assert_eq!(self.len(), state.len());
73        for (c, s) in self.iter().zip(state.iter_mut()) {
74            c.inplace(s, xy);
75        }
76    }
77}
78
79/// Chain a non-empty homogeneous array of processors with one initial type change.
80impl<X: Copy, Y: Copy, C, S, const N: usize> SplitProcess<X, Y, [S; N]> for [C; N]
81where
82    C: SplitProcess<X, Y, S> + SplitInplace<Y, S>,
83{
84    fn process(&self, state: &mut [S; N], x: X) -> Y {
85        const { assert!(N > 0) }
86        let Some(((c0, c), (s0, s))) = self.split_first().zip(state.split_first_mut()) else {
87            unreachable!()
88        };
89        c.iter()
90            .zip(s.iter_mut())
91            .fold(c0.process(s0, x), |x, (c, s)| c.process(s, x))
92    }
93
94    fn block(&self, state: &mut [S; N], x: &[X], y: &mut [Y]) {
95        const { assert!(N > 0) }
96        let Some(((c0, c), (s0, s))) = self.split_first().zip(state.split_first_mut()) else {
97            unreachable!()
98        };
99        c0.block(s0, x, y);
100        for (c, s) in c.iter().zip(s.iter_mut()) {
101            c.inplace(s, y)
102        }
103    }
104}
105
106impl<X: Copy, C, S, const N: usize> SplitInplace<X, [S; N]> for [C; N]
107where
108    C: SplitInplace<X, S>,
109{
110    fn inplace(&self, state: &mut [S; N], xy: &mut [X]) {
111        self.as_ref().inplace(state.as_mut(), xy)
112    }
113}
114
115//////////// SPLIT MINOR ////////////
116
117/// Processor-minor, data-major serial composition.
118///
119/// `Minor` changes only the loop nest used by `block()` and `inplace()`.
120/// Scalar `process()` and the signal semantics are unchanged.
121///
122/// Without `Minor`, tuple and array composition are stage-major:
123/// one stage runs over the whole slice and then the next stage runs over the
124/// whole slice. With `Minor`, the outer loop is over samples and each sample is
125/// pushed through the wrapped stages before moving to the next sample.
126///
127/// Use this when:
128/// - the wrapped stages are small and fine-grained
129/// - per-stage state/configuration is small enough to keep hot while stepping
130///   sample by sample
131/// - there is little value in preserving each stage's own `block()`/`inplace()`
132///   specialization
133/// - tuple composition must cross an intermediate type where the downstream
134///   stage is not `SplitInplace` over that type
135///
136/// Avoid this when:
137/// - a stage has a meaningful `block()` specialization that benefits from
138///   seeing a long contiguous slice
139/// - SIMD or autovectorization needs stage-major contiguous data
140/// - cache behavior is dominated by streaming through data rather than by
141///   keeping tiny stage state hot
142/// - an explicit scratch buffer via [`Major`] is a better fit
143///
144/// In short: `Minor` trades stage-wise streaming locality for per-sample stage
145/// locality. It is often good for tiny recursive stages, but it can be the
146/// wrong choice for stages whose slice path exists to improve cache use or SIMD.
147#[derive(Clone, Copy, Debug, Default)]
148#[repr(transparent)]
149pub struct Minor<C: ?Sized, U> {
150    /// An intermediate data type
151    _intermediate: PhantomData<U>,
152    /// The inner configurations
153    inner: C,
154}
155
156impl<C, U> Minor<C, U> {
157    /// Create a [`Minor`] wrapper around an existing composition.
158    #[must_use]
159    pub const fn new(inner: C) -> Self {
160        Self {
161            inner,
162            _intermediate: PhantomData,
163        }
164    }
165
166    /// Consume the wrapper and return the inner composition.
167    #[must_use]
168    pub fn into_inner(self) -> C {
169        self.inner
170    }
171
172    /// Borrow the wrapped composition.
173    #[must_use]
174    pub fn inner(&self) -> &C {
175        &self.inner
176    }
177}
178
179impl<X: Copy, U: Copy, Y, C0, C1, S0, S1> SplitProcess<X, Y, (S0, S1)> for Minor<(C0, C1), U>
180where
181    C0: SplitProcess<X, U, S0>,
182    C1: SplitProcess<U, Y, S1>,
183{
184    fn process(&self, state: &mut (S0, S1), x: X) -> Y {
185        self.inner
186            .1
187            .process(&mut state.1, self.inner.0.process(&mut state.0, x))
188    }
189}
190
191/// A chain of multiple small filters of the same type
192impl<X: Copy, C, S> SplitProcess<X, X, [S]> for Minor<[C], X>
193where
194    C: SplitProcess<X, X, S>,
195{
196    fn process(&self, state: &mut [S], x: X) -> X {
197        debug_assert_eq!(self.inner.len(), state.len());
198        self.inner
199            .iter()
200            .zip(state.iter_mut())
201            .fold(x, |x, (c, s)| c.process(s, x))
202    }
203}
204
205/// A chain of multiple small filters of the same type
206impl<X: Copy, Y: Copy, C, S, const N: usize> SplitProcess<X, Y, [S; N]> for Minor<[C; N], Y>
207where
208    C: SplitProcess<X, Y, S> + SplitProcess<Y, Y, S>,
209{
210    fn process(&self, state: &mut [S; N], x: X) -> Y {
211        const { assert!(N > 0) }
212        let Some(((c0, c), (s0, s))) = self.inner.split_first().zip(state.split_first_mut()) else {
213            unreachable!()
214        };
215        c.iter()
216            .zip(s.iter_mut())
217            .fold(c0.process(s0, x), |x, (c, s)| c.process(s, x))
218    }
219}
220
221impl<X, U, C, S> SplitInplace<X, S> for Minor<C, U>
222where
223    X: Copy,
224    Self: SplitProcess<X, X, S>,
225{
226}
227
228//////////// SPLIT PARALLEL ////////////
229
230/// Parallel branch composition over tuple or array-shaped data.
231///
232/// `Parallel` is the branching companion to serial tuple/array composition:
233/// each branch receives its own lane of the input and produces its own lane of
234/// the output. It does not reorder memory or introduce cross-lane interaction.
235///
236/// Use this when the signal is already structurally parallel, such as I/Q pairs,
237/// stereo lane groups, or fixed branch banks. Use [`crate::Add`], [`crate::Sub`],
238/// or [`crate::Mul`] afterwards when those branch outputs should be reduced.
239#[derive(Clone, Copy, Debug, Default)]
240pub struct Parallel<P>(P);
241
242impl<P> Parallel<P> {
243    /// Create a [`Parallel`] wrapper around an existing composition.
244    #[must_use]
245    pub const fn new(inner: P) -> Self {
246        Self(inner)
247    }
248
249    /// Consume the wrapper and return the inner composition.
250    #[must_use]
251    pub fn into_inner(self) -> P {
252        self.0
253    }
254}
255
256impl<X0: Copy, X1: Copy, Y0, Y1, C0, C1, S0, S1> SplitProcess<(X0, X1), (Y0, Y1), (S0, S1)>
257    for Parallel<(C0, C1)>
258where
259    C0: SplitProcess<X0, Y0, S0>,
260    C1: SplitProcess<X1, Y1, S1>,
261{
262    fn process(&self, state: &mut (S0, S1), x: (X0, X1)) -> (Y0, Y1) {
263        (
264            self.0.0.process(&mut state.0, x.0),
265            self.0.1.process(&mut state.1, x.1),
266        )
267    }
268}
269
270impl<X: Copy, Y, C0, C1, S0, S1> SplitProcess<[X; 2], [Y; 2], (S0, S1)> for Parallel<(C0, C1)>
271where
272    C0: SplitProcess<X, Y, S0>,
273    C1: SplitProcess<X, Y, S1>,
274{
275    fn process(&self, state: &mut (S0, S1), x: [X; 2]) -> [Y; 2] {
276        [
277            self.0.0.process(&mut state.0, x[0]),
278            self.0.1.process(&mut state.1, x[1]),
279        ]
280    }
281}
282
283impl<X: Copy, Y, C, S, const N: usize> SplitProcess<[X; N], [Y; N], [S; N]> for Parallel<[C; N]>
284where
285    C: SplitProcess<X, Y, S>,
286{
287    fn process(&self, state: &mut [S; N], x: [X; N]) -> [Y; N] {
288        // `poor-codegen-from-fn-iter-next`: keep this as direct indexed construction.
289        core::array::from_fn(|i| self.0[i].process(&mut state[i], x[i]))
290    }
291}
292
293impl<X, C, S> SplitInplace<X, S> for Parallel<C>
294where
295    X: Copy,
296    Self: SplitProcess<X, X, S>,
297{
298}
299
300//////////// BY LANE ////////////
301
302/// Explicit lane-major view interpretation for parallel compositions.
303///
304/// Scalar `process()` is the same as [`Parallel`]: each lane is processed by
305/// its matching branch. The difference is in view processing: under
306/// [`View<_, _, LaneMajor, _>`], each branch sees one long contiguous lane
307/// slice rather than strided frame-major data.
308///
309/// Use this when branches represent lanes and view locality matters. This is
310/// often the right choice for SIMD-friendly per-lane kernels or when each
311/// branch has a useful slice-processing specialization. Do not use it as a semantic
312/// transpose: it only changes how typed views are interpreted and never
313/// allocates or physically moves data.
314#[derive(Clone, Copy, Debug, Default)]
315pub struct ByLane<C>(C);
316
317impl<C> ByLane<C> {
318    /// Create a [`ByLane`] wrapper around an existing composition.
319    #[must_use]
320    pub const fn new(inner: C) -> Self {
321        Self(inner)
322    }
323
324    /// Consume the wrapper and return the inner composition.
325    #[must_use]
326    pub fn into_inner(self) -> C {
327        self.0
328    }
329}
330
331impl<X: Copy, Y, C0, C1, S0, S1> SplitProcess<[X; 2], [Y; 2], (S0, S1)> for ByLane<(C0, C1)>
332where
333    C0: SplitProcess<X, Y, S0>,
334    C1: SplitProcess<X, Y, S1>,
335{
336    fn process(&self, state: &mut (S0, S1), x: [X; 2]) -> [Y; 2] {
337        [
338            self.0.0.process(&mut state.0, x[0]),
339            self.0.1.process(&mut state.1, x[1]),
340        ]
341    }
342}
343
344impl<'a, 'b, X: Copy, Y, C0, C1, S0, S1>
345    SplitViewProcess<View<'a, X, LaneMajor, 2>, ViewMut<'b, Y, LaneMajor, 2>, (S0, S1)>
346    for ByLane<(C0, C1)>
347where
348    C0: SplitProcess<X, Y, S0>,
349    C1: SplitProcess<X, Y, S1>,
350{
351    fn process_view(
352        &self,
353        state: &mut (S0, S1),
354        x: View<'a, X, LaneMajor, 2>,
355        mut y: ViewMut<'b, Y, LaneMajor, 2>,
356    ) {
357        debug_assert_eq!(x.frames(), y.frames());
358        self.0.0.block(&mut state.0, x.lane(0), y.lane_mut(0));
359        self.0.1.block(&mut state.1, x.lane(1), y.lane_mut(1));
360    }
361}
362
363impl<X: Copy, Y, C, S, const N: usize> SplitProcess<[X; N], [Y; N], [S; N]> for ByLane<[C; N]>
364where
365    C: SplitProcess<X, Y, S>,
366{
367    fn process(&self, state: &mut [S; N], x: [X; N]) -> [Y; N] {
368        // `poor-codegen-from-fn-iter-next`: keep this as direct indexed construction.
369        core::array::from_fn(|i| self.0[i].process(&mut state[i], x[i]))
370    }
371}
372
373impl<'a, 'b, X: Copy, Y, C, S, const N: usize>
374    SplitViewProcess<View<'a, X, LaneMajor, N>, ViewMut<'b, Y, LaneMajor, N>, [S; N]>
375    for ByLane<[C; N]>
376where
377    C: SplitProcess<X, Y, S>,
378{
379    fn process_view(
380        &self,
381        state: &mut [S; N],
382        x: View<'a, X, LaneMajor, N>,
383        mut y: ViewMut<'b, Y, LaneMajor, N>,
384    ) {
385        debug_assert_eq!(x.frames(), y.frames());
386        for ((c, s), i) in self.0.iter().zip(state.iter_mut()).zip(0..) {
387            c.block(s, x.lane(i), y.lane_mut(i))
388        }
389    }
390}
391
392impl<X, C, S> SplitInplace<X, S> for ByLane<C>
393where
394    X: Copy,
395    Self: SplitProcess<X, X, S>,
396{
397}
398
399impl<'a, X: Copy, C0, C1, S0, S1> SplitViewInplace<ViewMut<'a, X, LaneMajor, 2>, (S0, S1)>
400    for ByLane<(C0, C1)>
401where
402    C0: SplitInplace<X, S0>,
403    C1: SplitInplace<X, S1>,
404{
405    fn inplace_view(&self, state: &mut (S0, S1), mut xy: ViewMut<'a, X, LaneMajor, 2>) {
406        self.0.0.inplace(&mut state.0, xy.lane_mut(0));
407        self.0.1.inplace(&mut state.1, xy.lane_mut(1));
408    }
409}
410
411impl<'a, X: Copy, C, S, const N: usize> SplitViewInplace<ViewMut<'a, X, LaneMajor, N>, [S; N]>
412    for ByLane<[C; N]>
413where
414    C: SplitInplace<X, S>,
415{
416    fn inplace_view(&self, state: &mut [S; N], mut xy: ViewMut<'a, X, LaneMajor, N>) {
417        for ((c, s), i) in self.0.iter().zip(state.iter_mut()).zip(0..) {
418            c.inplace(s, xy.lane_mut(i));
419        }
420    }
421}
422
423//////////// LANES ////////////
424
425/// Multiple lanes with one shared configuration and separate states.
426///
427/// `Lanes` is the main reason the split-state API exists: immutable
428/// configuration is stored once while each lane keeps its own mutable state.
429/// Scalar processing is lane-wise over `[X; N]`.
430///
431/// For view processing, pair it with [`View<_, _, LaneMajor, _>`] when the
432/// natural memory layout is lane-major and the inner stage benefits from long
433/// contiguous per-lane slices. Prefer this over `repeat()` when all lanes
434/// should use exactly the same configuration.
435///
436/// # Examples
437///
438/// ```rust
439/// use dsp_process::{LaneMajor, Offset, Split, View, ViewMut, ViewProcess};
440///
441/// let mut p = Split::stateless(Offset(3)).lanes::<2>();
442/// let x = View::<_, LaneMajor, 2>::from_flat(&[1, 2, 3, 10, 20, 30], 3);
443/// let mut y = [0; 6];
444/// let yv = ViewMut::<_, LaneMajor, 2>::from_flat(&mut y, 3);
445/// ViewProcess::process_view(&mut p, x, yv);
446/// assert_eq!(y, [4, 5, 6, 13, 23, 33]);
447/// ```
448#[derive(Clone, Copy, Debug, Default)]
449pub struct Lanes<C>(C);
450
451impl<C> Lanes<C> {
452    /// Create a [`Lanes`] wrapper around an existing composition.
453    #[must_use]
454    pub const fn new(inner: C) -> Self {
455        Self(inner)
456    }
457
458    /// Consume the wrapper and return the inner composition.
459    #[must_use]
460    pub fn into_inner(self) -> C {
461        self.0
462    }
463}
464
465/// Process data from multiple lanes with a common configuration.
466///
467/// For layout-sensitive view processing use [`View<_, _, LaneMajor, _>`].
468impl<X: Copy, Y, C, S, const N: usize> SplitProcess<[X; N], [Y; N], [S; N]> for Lanes<C>
469where
470    C: SplitProcess<X, Y, S>,
471{
472    fn process(&self, state: &mut [S; N], x: [X; N]) -> [Y; N] {
473        // `poor-codegen-from-fn-iter-next`: keep this as direct indexed construction.
474        core::array::from_fn(|i| self.0.process(&mut state[i], x[i]))
475    }
476}
477
478impl<'a, 'b, X: Copy, Y, C, S, const N: usize>
479    SplitViewProcess<View<'a, X, LaneMajor, N>, ViewMut<'b, Y, LaneMajor, N>, [S; N]> for Lanes<C>
480where
481    C: SplitProcess<X, Y, S>,
482{
483    fn process_view(
484        &self,
485        state: &mut [S; N],
486        x: View<'a, X, LaneMajor, N>,
487        mut y: ViewMut<'b, Y, LaneMajor, N>,
488    ) {
489        debug_assert_eq!(x.frames(), y.frames());
490        for (state, i) in state.iter_mut().zip(0..) {
491            self.0.block(state, x.lane(i), y.lane_mut(i))
492        }
493    }
494}
495
496impl<X, C, S> SplitInplace<X, S> for Lanes<C>
497where
498    X: Copy,
499    Self: SplitProcess<X, X, S>,
500{
501}
502
503impl<'a, X: Copy, C, S, const N: usize> SplitViewInplace<ViewMut<'a, X, LaneMajor, N>, [S; N]>
504    for Lanes<C>
505where
506    C: SplitInplace<X, S>,
507{
508    fn inplace_view(&self, state: &mut [S; N], mut xy: ViewMut<'a, X, LaneMajor, N>) {
509        for (state, i) in state.iter_mut().zip(0..) {
510            self.0.inplace(state, xy.lane_mut(i));
511        }
512    }
513}
514
515//////////// SPLIT MAJOR ////////////
516
517/// Stage-major slice composition with explicit scratch storage.
518///
519/// `Major` keeps ordinary scalar `process()` semantics but changes `block()` and
520/// `inplace()` to process the pipeline in chunks through an explicit
521/// intermediate buffer. Each stage sees a contiguous scratch slice before the
522/// next stage runs.
523///
524/// Use this when:
525/// - stages have useful `block()` implementations that should see long slices
526/// - stage-major traversal is better for cache behavior than sample-by-sample
527///   traversal
528/// - an intermediate type change makes plain inplace composition impossible
529/// - preserving SIMD/autovectorization opportunities across slice stages matters
530///
531/// Avoid it when:
532/// - stages are tiny and re-entering them per scratch chunk costs more than it saves
533/// - the intermediate buffer would be large or awkward to materialize
534/// - [`Minor`] already fits because the hot working set is tiny and per-sample
535///   stage locality dominates
536///
537/// In short: `Major` preserves stage-wise slice processing and pays for that
538/// with explicit scratch.
539#[derive(Debug, Clone, Copy, Default)]
540pub struct Major<P: ?Sized, U> {
541    /// Intermediate buffer
542    _buf: PhantomData<U>,
543    /// The inner processors
544    inner: P,
545}
546impl<P, U> Major<P, U> {
547    /// Create a [`Major`] wrapper around an existing composition.
548    #[must_use]
549    pub const fn new(inner: P) -> Self {
550        Self {
551            inner,
552            _buf: PhantomData,
553        }
554    }
555
556    /// Consume the wrapper and return the inner composition.
557    #[must_use]
558    pub fn into_inner(self) -> P {
559        self.inner
560    }
561
562    /// Borrow the wrapped composition.
563    #[must_use]
564    pub fn inner(&self) -> &P {
565        &self.inner
566    }
567}
568
569impl<X: Copy, U: Copy + Default, Y, C0, C1, S0, S1, const N: usize> SplitProcess<X, Y, (S0, S1)>
570    for Major<(C0, C1), [U; N]>
571where
572    C0: SplitProcess<X, U, S0>,
573    C1: SplitProcess<U, Y, S1>,
574{
575    fn process(&self, state: &mut (S0, S1), x: X) -> Y {
576        self.inner
577            .1
578            .process(&mut state.1, self.inner.0.process(&mut state.0, x))
579    }
580
581    fn block(&self, state: &mut (S0, S1), x: &[X], y: &mut [Y]) {
582        debug_assert_eq!(x.len(), y.len());
583        let mut u = [U::default(); N];
584        let (x, xr) = x.as_chunks::<N>();
585        let (y, yr) = y.as_chunks_mut::<N>();
586        for (x, y) in x.iter().zip(y) {
587            self.inner.0.block(&mut state.0, x, &mut u);
588            self.inner.1.block(&mut state.1, &u, y);
589        }
590        let ur = &mut u[..xr.len()];
591        self.inner.0.block(&mut state.0, xr, ur);
592        self.inner.1.block(&mut state.1, ur, yr);
593    }
594}
595
596impl<X: Copy, U: Copy + Default, C0, C1, S0, S1, const N: usize> SplitInplace<X, (S0, S1)>
597    for Major<(C0, C1), [U; N]>
598where
599    C0: SplitProcess<X, U, S0>,
600    C1: SplitProcess<U, X, S1>,
601{
602    fn inplace(&self, state: &mut (S0, S1), xy: &mut [X]) {
603        let mut u = [U::default(); N];
604        let (xy, xyr) = xy.as_chunks_mut::<N>();
605        for xy in xy {
606            self.inner.0.block(&mut state.0, xy, &mut u);
607            self.inner.1.block(&mut state.1, &u, xy);
608        }
609        let ur = &mut u[..xyr.len()];
610        self.inner.0.block(&mut state.0, xyr, ur);
611        self.inner.1.block(&mut state.1, ur, xyr);
612    }
613}