Skip to main content

ndwt/
iter.rs

1//! N-dimensional lane iteration over flat slices and ndarray arrays.
2//!
3//! A *lane* is a 1-D slice through an N-dimensional array along a single axis —
4//! equivalent to a row, column, or fibre depending on which axis is chosen.
5//! These iterators are used internally by the wavelet drivers to apply 1-D
6//! transforms independently along each axis of a multi-dimensional array.
7
8pub mod chunk_strided_slice;
9pub mod strided_slice;
10#[cfg(feature = "ndarray")]
11use ndarray::{ArrayRef, Dimension};
12use num_traits::Zero;
13use std::marker::PhantomData;
14use std::ops::ControlFlow;
15use std::ptr::NonNull;
16
17use crate::{ChunkWidth, utils::stride_from_shape};
18use chunk_strided_slice::{IterLaneChunks, IterLaneChunksMut};
19use strided_slice::{IterLanes, IterLanesMut};
20
21pub use chunk_strided_slice::ChunkStridedSliceRef;
22pub use strided_slice::StridedSliceRef;
23
24#[inline]
25#[track_caller]
26pub(crate) fn unravel(flat_index: usize, shape: &[usize]) -> Vec<usize> {
27    let n_max: usize = shape.iter().product();
28    assert!(
29        flat_index <= n_max,
30        "Flat index is beyond the end of the array."
31    );
32
33    // a special case for flat_index == n_max to return an unraveled index that points
34    // one past the last item.
35    // i.e. it looks like (n0-1, n1 -1, n2)
36    // so it would need to be pre retreated by one **before** it is valid.
37    if flat_index == n_max {
38        let mut inds = shape.iter().map(|n| n - 1).collect::<Vec<_>>();
39        if let Some(last) = inds.last_mut()
40            && let Some(n_last) = shape.last()
41        {
42            *last = *n_last;
43        }
44        return inds;
45    }
46    let mut inds = vec![0; shape.len()];
47    let mut flat_index = flat_index;
48    inds.iter_mut()
49        .zip(shape.iter())
50        .rev()
51        .for_each(|(i_dir, n_dir)| {
52            *i_dir = flat_index % n_dir;
53            flat_index /= n_dir;
54        });
55    inds
56}
57
58/// Marker trait for readable strided data containers.
59///
60/// # Safety
61/// Implementors must guarantee that the underlying memory is valid for reads for the
62/// declared element type.
63pub unsafe trait Data: Sized {
64    /// The element type stored in the container.
65    type Elem;
66}
67
68/// Marker trait for writable strided data containers.
69///
70/// # Safety
71/// Implementors must guarantee that the underlying memory is valid for reads and writes
72/// for the declared element type, and that no other reference aliases the same memory.
73pub unsafe trait DataMut: Data {}
74
75/// Phantom type used to attach lifetime information to strided slice views.
76pub struct SliceLifetime<T> {
77    _member: PhantomData<T>,
78}
79
80unsafe impl<T> Data for SliceLifetime<&T> {
81    type Elem = T;
82}
83
84unsafe impl<T> Data for SliceLifetime<&mut T> {
85    type Elem = T;
86}
87
88unsafe impl<T> DataMut for SliceLifetime<&mut T> {}
89
90#[derive(Clone, Debug)]
91pub(crate) struct ArrayInfo {
92    shape: Vec<usize>,
93    stride: Vec<isize>,
94    lane_length: usize,
95    lane_stride: isize,
96}
97
98impl ArrayInfo {
99    #[track_caller]
100    fn new(shape: &[usize], stride: &[isize], axis: usize) -> Self {
101        assert!(
102            axis < shape.len(),
103            "Specified axis exceeds shape dimensions"
104        );
105        assert_eq!(
106            stride.len(),
107            shape.len(),
108            "Shape and stride should have the same length."
109        );
110        let mut stride = stride.to_owned();
111        let mut shape = shape.to_owned();
112
113        let lane_length = shape.remove(axis);
114        let lane_stride = stride.remove(axis);
115
116        Self {
117            shape,
118            stride,
119            lane_length,
120            lane_stride,
121        }
122    }
123    #[inline(always)]
124    fn n_lanes(&self) -> usize {
125        self.shape.iter().product()
126    }
127
128    #[inline(always)]
129    fn get_position_at(&self, i: usize) -> Vec<usize> {
130        unravel(i, &self.shape)
131    }
132
133    #[inline(always)]
134    fn get_offset_at(&self, pos: &[usize]) -> isize {
135        pos.iter()
136            .zip(self.stride.iter())
137            .fold(0, |acc, (i, step)| acc + *i as isize * step)
138    }
139
140    #[inline(always)]
141    fn advance_position_and_offset(&self, pos: &mut [usize], offset: &mut isize) {
142        let _ = self
143            .stride
144            .iter()
145            .zip(self.shape.iter())
146            .zip(pos)
147            .rev()
148            .try_for_each(|((str, shp), pos)| {
149                *offset += *str;
150                *pos += 1;
151                if *pos < *shp {
152                    return ControlFlow::Break(());
153                };
154                *pos = 0;
155                *offset -= *shp as isize * str;
156                ControlFlow::Continue(())
157            });
158    }
159
160    #[inline(always)]
161    fn retreat_position_and_offset(&self, pos: &mut [usize], offset: &mut isize) {
162        let _ = self
163            .stride
164            .iter()
165            .zip(self.shape.iter())
166            .zip(pos)
167            .rev()
168            .try_for_each(|((str, shp), pos)| {
169                if *pos == 0 {
170                    *pos = *shp - 1;
171                    *offset += *pos as isize * str;
172                    ControlFlow::Continue(())
173                } else {
174                    *pos -= 1;
175                    *offset -= *str;
176                    ControlFlow::Break(())
177                }
178            });
179    }
180}
181
182#[track_caller]
183fn lane_parts_from_slice<T>(arr: &[T], shape: &[usize], axis: usize) -> (NonNull<T>, ArrayInfo) {
184    lane_parts_from_sub_slice(arr, shape, shape, axis)
185}
186
187#[track_caller]
188fn lane_parts_from_sub_slice<T>(
189    arr: &[T],
190    shape: &[usize],
191    sub_shape: &[usize],
192    axis: usize,
193) -> (NonNull<T>, ArrayInfo) {
194    let n = arr.len();
195    assert!(
196        !arr.is_empty(),
197        "Attempted to create a lane iterator from an empty slice."
198    );
199    let n_items: usize = shape.iter().product();
200    assert_eq!(
201        n, n_items,
202        "array length must be consistent with the shape. Shape suggests {n_items}, but slice had {n} items."
203    );
204    assert_eq!(
205        shape.len(),
206        sub_shape.len(),
207        "shape length, {}, and sub_shape length, {}, must be equal",
208        shape.len(),
209        sub_shape.len()
210    );
211    assert!(
212        sub_shape.iter().zip(shape.iter()).all(|(n1, n2)| n1 <= n2),
213        "sub_shape: {:?}, must be equal to our smaller than shape, {:?}",
214        sub_shape,
215        shape,
216    );
217    assert!(
218        axis < shape.len(),
219        "axis: {axis} is out of bounds for dimension size of {}",
220        shape.len()
221    );
222
223    let stride = stride_from_shape(shape)
224        .into_iter()
225        .map(|s| s as isize)
226        .collect::<Vec<_>>();
227    // SAFETY: slice length > 0 so ptr is NonNull.
228    let ptr = unsafe { NonNull::new_unchecked(arr.as_ptr() as *mut T) };
229    (ptr, ArrayInfo::new(sub_shape, &stride, axis))
230}
231
232#[cfg(feature = "ndarray")]
233#[track_caller]
234fn lane_parts_from_ndarray<T, D: Dimension>(
235    arr: &ArrayRef<T, D>,
236    sub_shape: &[usize],
237    axis: usize,
238) -> (NonNull<T>, ArrayInfo) {
239    assert_ne!(
240        arr.len(),
241        0,
242        "Cannot create a lane iterator from an empty ndarray."
243    );
244    let ndim = arr.ndim();
245    assert!(
246        axis < ndim,
247        "axis: {axis} is out of bounds for dimension size of {ndim}",
248    );
249    assert_eq!(
250        sub_shape.len(),
251        arr.ndim(),
252        "shape.len(), {}, is not equal to arr.ndim(), {ndim}",
253        sub_shape.len(),
254    );
255
256    assert!(
257        sub_shape.iter().zip(arr.shape()).all(|(n, m)| n <= m),
258        "requested shape, {:?} must all be <= arr.shape(), {:?}.",
259        sub_shape,
260        arr.shape(),
261    );
262
263    // SAFETY: Array is not empty, so pointer to first element is gauranteed non-null.
264    let ptr = unsafe { NonNull::new_unchecked(arr.as_ptr() as *mut T) };
265    (ptr, ArrayInfo::new(sub_shape, arr.strides(), axis))
266}
267
268/// Iterate over 1-D lanes of an N-dimensional array along a chosen axis.
269///
270/// A *lane* is a 1-D slice along one axis while all other indices are held
271/// fixed.  Implementations are provided for `[T]` (flat row-major slices)
272/// and, when the `ndarray` feature is enabled, for `ndarray::ArrayRef<T, D>`.
273pub trait LanesIterator {
274    /// Element type stored in the array.
275    type Item;
276
277    /// Iterate over all lanes along `axis` of an array with the given `shape`.
278    ///
279    /// # Panics
280    ///
281    /// Panics if `axis >= shape.len()`, if the slice is empty, or if the slice length
282    /// does not equal `shape.iter().product()`.
283    fn iter_lanes<'a>(&'a self, shape: &[usize], axis: usize) -> IterLanes<'a, Self::Item>;
284
285    /// Mutably iterate over all lanes along `axis`.
286    ///
287    /// # Panics
288    ///
289    /// Same conditions as [`iter_lanes`](Self::iter_lanes).
290    fn iter_lanes_mut<'a>(
291        &'a mut self,
292        shape: &[usize],
293        axis: usize,
294    ) -> IterLanesMut<'a, Self::Item>;
295
296    /// Iterate over fixed-width chunks of lanes along `axis`.
297    ///
298    /// Each item is a group of `N` consecutive elements within a lane.
299    ///
300    /// # Panics
301    ///
302    /// Same conditions as [`iter_lanes`](Self::iter_lanes).
303    fn iter_lane_chunks<'a, const N: usize>(
304        &'a self,
305        shape: &[usize],
306        axis: usize,
307    ) -> (IterLaneChunks<'a, Self::Item, N>, IterLanes<'a, Self::Item>);
308
309    /// Mutably iterate over fixed-width chunks of lanes along `axis`.
310    ///
311    /// # Panics
312    ///
313    /// Same conditions as [`iter_lanes`](Self::iter_lanes).
314    fn iter_lane_chunks_mut<'a, const N: usize>(
315        &'a mut self,
316        shape: &[usize],
317        axis: usize,
318    ) -> (
319        IterLaneChunksMut<'a, Self::Item, N>,
320        IterLanesMut<'a, Self::Item>,
321    );
322
323    /// Iterate over lanes of a sub-region defined by `sub_shape`.
324    ///
325    /// Lanes are taken from the first `sub_shape[axis]` elements along `axis`,
326    /// with the outer shape given by `shape`.
327    ///
328    /// # Panics
329    ///
330    /// Panics if `axis >= shape.len()`, if `shape.len() != sub_shape.len()`, if any
331    /// `sub_shape[i] > shape[i]`, if the slice is empty, or if the slice length does
332    /// not equal `shape.iter().product()`.
333    fn iter_lanes_sub<'a>(
334        &'a self,
335        shape: &[usize],
336        sub_shape: &[usize],
337        axis: usize,
338    ) -> IterLanes<'a, Self::Item>;
339
340    /// Mutably iterate over lanes of a sub-region.
341    ///
342    /// # Panics
343    ///
344    /// Same conditions as [`iter_lanes_sub`](Self::iter_lanes_sub).
345    fn iter_lanes_sub_mut<'a>(
346        &'a mut self,
347        shape: &[usize],
348        sub_shape: &[usize],
349        axis: usize,
350    ) -> IterLanesMut<'a, Self::Item>;
351
352    /// Iterate over fixed-width chunks of lanes within a sub-region.
353    ///
354    /// # Panics
355    ///
356    /// Same conditions as [`iter_lanes_sub`](Self::iter_lanes_sub).
357    fn iter_lane_chunks_sub<'a, const N: usize>(
358        &'a self,
359        shape: &[usize],
360        sub_shape: &[usize],
361        axis: usize,
362    ) -> (IterLaneChunks<'a, Self::Item, N>, IterLanes<'a, Self::Item>);
363
364    /// Mutably iterate over fixed-width chunks of lanes within a sub-region.
365    ///
366    /// # Panics
367    ///
368    /// Same conditions as [`iter_lanes_sub`](Self::iter_lanes_sub).
369    fn iter_lane_chunks_sub_mut<'a, const N: usize>(
370        &'a mut self,
371        shape: &[usize],
372        sub_shape: &[usize],
373        axis: usize,
374    ) -> (
375        IterLaneChunksMut<'a, Self::Item, N>,
376        IterLanesMut<'a, Self::Item>,
377    );
378
379    /// Return the axis index with the smallest stride (most cache-friendly to
380    /// iterate over for the given `shape`).
381    fn min_stride_axis(&self, shape: &[usize]) -> usize;
382
383    /// Return whether a lane along the request axis will be contiguous.
384    fn is_ax_contiguous(&self, ax: usize, shape: &[usize]) -> bool;
385}
386
387impl<T> LanesIterator for [T] {
388    type Item = T;
389    #[track_caller]
390    fn iter_lanes<'a>(&'a self, shape: &[usize], axis: usize) -> IterLanes<'a, Self::Item> {
391        IterLanes::from_slice(self, shape, axis)
392    }
393    #[track_caller]
394    fn iter_lanes_mut<'a>(
395        &'a mut self,
396        shape: &[usize],
397        axis: usize,
398    ) -> IterLanesMut<'a, Self::Item> {
399        IterLanesMut::from_slice(self, shape, axis)
400    }
401
402    #[track_caller]
403    fn iter_lane_chunks<'a, const N: usize>(
404        &'a self,
405        shape: &[usize],
406        axis: usize,
407    ) -> (IterLaneChunks<'a, Self::Item, N>, IterLanes<'a, Self::Item>) {
408        IterLaneChunks::from_slice(self, shape, axis)
409    }
410
411    #[track_caller]
412    fn iter_lane_chunks_mut<'a, const N: usize>(
413        &'a mut self,
414        shape: &[usize],
415        axis: usize,
416    ) -> (
417        IterLaneChunksMut<'a, Self::Item, N>,
418        IterLanesMut<'a, Self::Item>,
419    ) {
420        IterLaneChunksMut::from_slice(self, shape, axis)
421    }
422
423    #[track_caller]
424    fn iter_lanes_sub<'a>(
425        &'a self,
426        shape: &[usize],
427        sub_shape: &[usize],
428        axis: usize,
429    ) -> IterLanes<'a, Self::Item> {
430        IterLanes::from_sub_slice(self, shape, sub_shape, axis)
431    }
432    #[track_caller]
433    fn iter_lanes_sub_mut<'a>(
434        &'a mut self,
435        shape: &[usize],
436        sub_shape: &[usize],
437        axis: usize,
438    ) -> IterLanesMut<'a, Self::Item> {
439        IterLanesMut::from_sub_slice(self, shape, sub_shape, axis)
440    }
441
442    #[track_caller]
443    fn iter_lane_chunks_sub<'a, const N: usize>(
444        &'a self,
445        shape: &[usize],
446        sub_shape: &[usize],
447        axis: usize,
448    ) -> (IterLaneChunks<'a, Self::Item, N>, IterLanes<'a, Self::Item>) {
449        IterLaneChunks::from_sub_slice(self, shape, sub_shape, axis)
450    }
451
452    #[track_caller]
453    fn iter_lane_chunks_sub_mut<'a, const N: usize>(
454        &'a mut self,
455        shape: &[usize],
456        sub_shape: &[usize],
457        axis: usize,
458    ) -> (
459        IterLaneChunksMut<'a, Self::Item, N>,
460        IterLanesMut<'a, Self::Item>,
461    ) {
462        IterLaneChunksMut::from_sub_slice(self, shape, sub_shape, axis)
463    }
464
465    fn min_stride_axis(&self, shape: &[usize]) -> usize {
466        if !shape.is_empty() {
467            shape.len() - 1
468        } else {
469            0
470        }
471    }
472
473    #[inline]
474    fn is_ax_contiguous(&self, ax: usize, shape: &[usize]) -> bool {
475        ax + 1 == shape.len()
476    }
477}
478
479#[cfg(feature = "ndarray")]
480impl<T, D: ::ndarray::Dimension> LanesIterator for ArrayRef<T, D> {
481    type Item = T;
482    #[track_caller]
483    fn iter_lanes<'a>(&'a self, shape: &[usize], axis: usize) -> IterLanes<'a, Self::Item> {
484        IterLanes::from_ndarray(self, shape, axis)
485    }
486    #[track_caller]
487    fn iter_lanes_mut<'a>(
488        &'a mut self,
489        shape: &[usize],
490        axis: usize,
491    ) -> IterLanesMut<'a, Self::Item> {
492        IterLanesMut::from_ndarray(self, shape, axis)
493    }
494
495    #[track_caller]
496    fn iter_lane_chunks<'a, const N: usize>(
497        &'a self,
498        shape: &[usize],
499        axis: usize,
500    ) -> (IterLaneChunks<'a, Self::Item, N>, IterLanes<'a, Self::Item>) {
501        IterLaneChunks::from_ndarray(self, shape, axis)
502    }
503
504    #[track_caller]
505    fn iter_lane_chunks_mut<'a, const N: usize>(
506        &'a mut self,
507        shape: &[usize],
508        axis: usize,
509    ) -> (
510        IterLaneChunksMut<'a, Self::Item, N>,
511        IterLanesMut<'a, Self::Item>,
512    ) {
513        IterLaneChunksMut::from_ndarray(self, shape, axis)
514    }
515
516    #[track_caller]
517    fn iter_lanes_sub<'a>(
518        &'a self,
519        _shape: &[usize],
520        sub_shape: &[usize],
521        axis: usize,
522    ) -> IterLanes<'a, Self::Item> {
523        IterLanes::from_ndarray(self, sub_shape, axis)
524    }
525
526    #[track_caller]
527    fn iter_lanes_sub_mut<'a>(
528        &'a mut self,
529        _shape: &[usize],
530        sub_shape: &[usize],
531        axis: usize,
532    ) -> IterLanesMut<'a, Self::Item> {
533        IterLanesMut::from_ndarray(self, sub_shape, axis)
534    }
535
536    #[track_caller]
537    fn iter_lane_chunks_sub<'a, const N: usize>(
538        &'a self,
539        _shape: &[usize],
540        sub_shape: &[usize],
541        axis: usize,
542    ) -> (IterLaneChunks<'a, Self::Item, N>, IterLanes<'a, Self::Item>) {
543        IterLaneChunks::from_ndarray(self, sub_shape, axis)
544    }
545
546    #[track_caller]
547    fn iter_lane_chunks_sub_mut<'a, const N: usize>(
548        &'a mut self,
549        _shape: &[usize],
550        sub_shape: &[usize],
551        axis: usize,
552    ) -> (
553        IterLaneChunksMut<'a, Self::Item, N>,
554        IterLanesMut<'a, Self::Item>,
555    ) {
556        IterLaneChunksMut::from_ndarray(self, sub_shape, axis)
557    }
558
559    fn min_stride_axis(&self, _shape: &[usize]) -> usize {
560        // copy input into the output
561        let (min_axis, _) = self
562            .strides()
563            .iter()
564            .cloned()
565            .enumerate()
566            .reduce(|acc, v| if v.1.abs() < acc.1.abs() { v } else { acc })
567            .unwrap_or((0, 0));
568
569        min_axis
570    }
571
572    #[inline]
573    fn is_ax_contiguous(&self, ax: usize, _shape: &[usize]) -> bool {
574        self.strides().get(ax).map(|v| *v == 1).unwrap_or(false)
575    }
576}
577
578pub(crate) fn copy_over<T, L, const N: usize>(
579    input: &L,
580    output: &mut L,
581    in_shape: &[usize],
582    out_shape: &[usize],
583) where
584    L: LanesIterator<Item = T> + ?Sized,
585    T: Clone + Zero + ChunkWidth<T, N>,
586{
587    // copy input into output
588    let min_axis = output.min_stride_axis(out_shape);
589    let (in_lanes, out_lanes) = if input.is_ax_contiguous(min_axis, in_shape)
590        || output.is_ax_contiguous(min_axis, out_shape)
591    {
592        (
593            input.iter_lanes_sub(in_shape, out_shape, min_axis),
594            output.iter_lanes_mut(out_shape, min_axis),
595        )
596    } else {
597        let (in_chunks, in_rem) = input.iter_lane_chunks_sub::<N>(in_shape, out_shape, min_axis);
598        let (out_chunks, out_rem) = output.iter_lane_chunks_mut::<N>(out_shape, min_axis);
599
600        out_chunks.zip(in_chunks).for_each(|(mut o, i)| {
601            o.iter_mut().zip(i.iter()).for_each(|(o, i)| {
602                o.into_iter()
603                    .zip(i.into_iter().cloned())
604                    .for_each(|(o, i)| {
605                        *o = i;
606                    });
607            });
608        });
609        (in_rem, out_rem)
610    };
611    out_lanes.zip(in_lanes).for_each(|(mut o, i)| {
612        o.iter_mut()
613            .zip(i.iter().cloned())
614            .for_each(|(o, i)| *o = i);
615    });
616}
617
618#[cfg(feature = "rayon")]
619/// Parallel lane iteration using Rayon.
620///
621/// Provides the same interface as [`LanesIterator`] but returns Rayon parallel
622/// iterators so that the caller can process independent lanes on multiple threads.
623pub mod parallel {
624    use super::chunk_strided_slice::parallel::{ParIterLaneChunks, ParIterLaneChunksMut};
625    use super::strided_slice::parallel::{ParIterLanes, ParIterLanesMut};
626    use super::*;
627
628    /// Iterate over 1-D lanes of an N-dimensional array along a chosen axis in parallel.
629    ///
630    /// A *lane* is a 1-D slice along one axis while all other indices are held
631    /// fixed.  Implementations are provided for `[T]` (flat row-major slices)
632    /// and, when the `ndarray` feature is enabled, for `ndarray::ArrayRef<T, D>`.
633    pub trait LanesParallelIterator: LanesIterator {
634        /// Iterate over all lanes along `axis` of an array with the given `shape`.
635        ///
636        /// # Panics
637        ///
638        /// Panics if `axis >= shape.len()`, if the slice is empty, or if the slice length
639        /// does not equal `shape.iter().product()`.
640        fn par_iter_lanes<'a>(
641            &'a self,
642            shape: &[usize],
643            axis: usize,
644        ) -> ParIterLanes<'a, Self::Item>;
645
646        /// Mutably iterate over all lanes along `axis`.
647        ///
648        /// # Panics
649        ///
650        /// Same conditions as [`par_iter_lanes`](Self::par_iter_lanes).
651        fn par_iter_lanes_mut<'a>(
652            &'a mut self,
653            shape: &[usize],
654            axis: usize,
655        ) -> ParIterLanesMut<'a, Self::Item>;
656
657        /// Iterate over SIMD-width chunks of lanes along `axis`.
658        ///
659        /// Each item is a group of `N` consecutive elements within a lane.
660        ///
661        /// # Panics
662        ///
663        /// Same conditions as [`par_iter_lanes`](Self::par_iter_lanes).
664        fn par_iter_lane_chunks<'a, const N: usize>(
665            &'a self,
666            shape: &[usize],
667            axis: usize,
668        ) -> (
669            ParIterLaneChunks<'a, Self::Item, N>,
670            ParIterLanes<'a, Self::Item>,
671        );
672
673        /// Mutably iterate over SIMD-width chunks of lanes along `axis`.
674        ///
675        /// # Panics
676        ///
677        /// Same conditions as [`par_iter_lanes`](Self::par_iter_lanes).
678        fn par_iter_lane_chunks_mut<'a, const N: usize>(
679            &'a mut self,
680            shape: &[usize],
681            axis: usize,
682        ) -> (
683            ParIterLaneChunksMut<'a, Self::Item, N>,
684            ParIterLanesMut<'a, Self::Item>,
685        );
686
687        /// Iterate over lanes of a sub-region defined by `sub_shape`.
688        ///
689        /// Lanes are taken from the first `sub_shape[axis]` elements along `axis`,
690        /// with the outer shape given by `shape`.
691        ///
692        /// # Panics
693        ///
694        /// Panics if `axis >= shape.len()`, if `shape.len() != sub_shape.len()`, if any
695        /// `sub_shape[i] > shape[i]`, if the slice is empty, or if the slice length does
696        /// not equal `shape.iter().product()`.
697        fn par_iter_lanes_sub<'a>(
698            &'a self,
699            shape: &[usize],
700            sub_shape: &[usize],
701            axis: usize,
702        ) -> ParIterLanes<'a, Self::Item>;
703
704        /// Mutably iterate over lanes of a sub-region.
705        ///
706        /// # Panics
707        ///
708        /// Same conditions as [`par_iter_lanes_sub`](Self::par_iter_lanes_sub).
709        fn par_iter_lanes_sub_mut<'a>(
710            &'a mut self,
711            shape: &[usize],
712            sub_shape: &[usize],
713            axis: usize,
714        ) -> ParIterLanesMut<'a, Self::Item>;
715
716        /// Iterate over SIMD-width chunks of lanes within a sub-region.
717        ///
718        /// # Panics
719        ///
720        /// Same conditions as [`par_iter_lanes_sub`](Self::par_iter_lanes_sub).
721        fn par_iter_lane_chunks_sub<'a, const N: usize>(
722            &'a self,
723            shape: &[usize],
724            sub_shape: &[usize],
725            axis: usize,
726        ) -> (
727            ParIterLaneChunks<'a, Self::Item, N>,
728            ParIterLanes<'a, Self::Item>,
729        );
730
731        /// Mutably iterate over SIMD-width chunks of lanes within a sub-region.
732        ///
733        /// # Panics
734        ///
735        /// Same conditions as [`par_iter_lanes_sub`](Self::par_iter_lanes_sub).
736        fn par_iter_lane_chunks_sub_mut<'a, const N: usize>(
737            &'a mut self,
738            shape: &[usize],
739            sub_shape: &[usize],
740            axis: usize,
741        ) -> (
742            ParIterLaneChunksMut<'a, Self::Item, N>,
743            ParIterLanesMut<'a, Self::Item>,
744        );
745    }
746
747    impl<T> LanesParallelIterator for [T] {
748        #[track_caller]
749        fn par_iter_lanes<'a>(
750            &'a self,
751            shape: &[usize],
752            axis: usize,
753        ) -> ParIterLanes<'a, Self::Item> {
754            ParIterLanes::from_slice(self, shape, axis)
755        }
756        #[track_caller]
757        fn par_iter_lanes_mut<'a>(
758            &'a mut self,
759            shape: &[usize],
760            axis: usize,
761        ) -> ParIterLanesMut<'a, Self::Item> {
762            ParIterLanesMut::from_slice(self, shape, axis)
763        }
764
765        #[track_caller]
766        fn par_iter_lane_chunks<'a, const N: usize>(
767            &'a self,
768            shape: &[usize],
769            axis: usize,
770        ) -> (
771            ParIterLaneChunks<'a, Self::Item, N>,
772            ParIterLanes<'a, Self::Item>,
773        ) {
774            ParIterLaneChunks::from_slice(self, shape, axis)
775        }
776
777        #[track_caller]
778        fn par_iter_lane_chunks_mut<'a, const N: usize>(
779            &'a mut self,
780            shape: &[usize],
781            axis: usize,
782        ) -> (
783            ParIterLaneChunksMut<'a, Self::Item, N>,
784            ParIterLanesMut<'a, Self::Item>,
785        ) {
786            ParIterLaneChunksMut::from_slice(self, shape, axis)
787        }
788
789        #[track_caller]
790        fn par_iter_lanes_sub<'a>(
791            &'a self,
792            shape: &[usize],
793            sub_shape: &[usize],
794            axis: usize,
795        ) -> ParIterLanes<'a, Self::Item> {
796            ParIterLanes::from_sub_slice(self, shape, sub_shape, axis)
797        }
798        #[track_caller]
799        fn par_iter_lanes_sub_mut<'a>(
800            &'a mut self,
801            shape: &[usize],
802            sub_shape: &[usize],
803            axis: usize,
804        ) -> ParIterLanesMut<'a, Self::Item> {
805            ParIterLanesMut::from_sub_slice(self, shape, sub_shape, axis)
806        }
807
808        #[track_caller]
809        fn par_iter_lane_chunks_sub<'a, const N: usize>(
810            &'a self,
811            shape: &[usize],
812            sub_shape: &[usize],
813            axis: usize,
814        ) -> (
815            ParIterLaneChunks<'a, Self::Item, N>,
816            ParIterLanes<'a, Self::Item>,
817        ) {
818            ParIterLaneChunks::from_sub_slice(self, shape, sub_shape, axis)
819        }
820
821        #[track_caller]
822        fn par_iter_lane_chunks_sub_mut<'a, const N: usize>(
823            &'a mut self,
824            shape: &[usize],
825            sub_shape: &[usize],
826            axis: usize,
827        ) -> (
828            ParIterLaneChunksMut<'a, Self::Item, N>,
829            ParIterLanesMut<'a, Self::Item>,
830        ) {
831            ParIterLaneChunksMut::from_sub_slice(self, shape, sub_shape, axis)
832        }
833    }
834
835    #[cfg(feature = "ndarray")]
836    impl<T, D: ::ndarray::Dimension> LanesParallelIterator for ArrayRef<T, D> {
837        #[track_caller]
838        fn par_iter_lanes<'a>(
839            &'a self,
840            shape: &[usize],
841            axis: usize,
842        ) -> ParIterLanes<'a, Self::Item> {
843            ParIterLanes::from_ndarray(self, shape, axis)
844        }
845        #[track_caller]
846        fn par_iter_lanes_mut<'a>(
847            &'a mut self,
848            shape: &[usize],
849            axis: usize,
850        ) -> ParIterLanesMut<'a, Self::Item> {
851            ParIterLanesMut::from_ndarray(self, shape, axis)
852        }
853
854        #[track_caller]
855        fn par_iter_lane_chunks<'a, const N: usize>(
856            &'a self,
857            shape: &[usize],
858            axis: usize,
859        ) -> (
860            ParIterLaneChunks<'a, Self::Item, N>,
861            ParIterLanes<'a, Self::Item>,
862        ) {
863            ParIterLaneChunks::from_ndarray(self, shape, axis)
864        }
865
866        #[track_caller]
867        fn par_iter_lane_chunks_mut<'a, const N: usize>(
868            &'a mut self,
869            shape: &[usize],
870            axis: usize,
871        ) -> (
872            ParIterLaneChunksMut<'a, Self::Item, N>,
873            ParIterLanesMut<'a, Self::Item>,
874        ) {
875            ParIterLaneChunksMut::from_ndarray(self, shape, axis)
876        }
877
878        #[track_caller]
879        fn par_iter_lanes_sub<'a>(
880            &'a self,
881            _shape: &[usize],
882            sub_shape: &[usize],
883            axis: usize,
884        ) -> ParIterLanes<'a, Self::Item> {
885            ParIterLanes::from_ndarray(self, sub_shape, axis)
886        }
887        #[track_caller]
888        fn par_iter_lanes_sub_mut<'a>(
889            &'a mut self,
890            _shape: &[usize],
891            sub_shape: &[usize],
892            axis: usize,
893        ) -> ParIterLanesMut<'a, Self::Item> {
894            ParIterLanesMut::from_ndarray(self, sub_shape, axis)
895        }
896
897        #[track_caller]
898        fn par_iter_lane_chunks_sub<'a, const N: usize>(
899            &'a self,
900            _shape: &[usize],
901            sub_shape: &[usize],
902            axis: usize,
903        ) -> (
904            ParIterLaneChunks<'a, Self::Item, N>,
905            ParIterLanes<'a, Self::Item>,
906        ) {
907            ParIterLaneChunks::from_ndarray(self, sub_shape, axis)
908        }
909
910        #[track_caller]
911        fn par_iter_lane_chunks_sub_mut<'a, const N: usize>(
912            &'a mut self,
913            _shape: &[usize],
914            sub_shape: &[usize],
915            axis: usize,
916        ) -> (
917            ParIterLaneChunksMut<'a, Self::Item, N>,
918            ParIterLanesMut<'a, Self::Item>,
919        ) {
920            ParIterLaneChunksMut::from_ndarray(self, sub_shape, axis)
921        }
922    }
923
924    pub(crate) fn copy_over<T, L, const N: usize>(
925        input: &L,
926        output: &mut L,
927        in_shape: &[usize],
928        out_shape: &[usize],
929    ) where
930        L: LanesParallelIterator<Item = T> + ?Sized,
931        T: Clone + Zero + ChunkWidth<T, N> + Send + Sync,
932    {
933        use rayon::iter::{IndexedParallelIterator, ParallelIterator};
934        // copy input into output
935        let min_axis = output.min_stride_axis(out_shape);
936        let (in_chunks, in_rem) =
937            input.par_iter_lane_chunks_sub::<N>(in_shape, out_shape, min_axis);
938        let (out_chunks, out_rem) = output.par_iter_lane_chunks_mut::<N>(out_shape, min_axis);
939
940        out_chunks.zip(in_chunks).for_each(|(mut o, i)| {
941            o.iter_mut().zip(i.iter()).for_each(|(o, i)| {
942                o.into_iter()
943                    .zip(i.into_iter().cloned())
944                    .for_each(|(o, i)| {
945                        *o = i;
946                    });
947            });
948        });
949        out_rem.zip(in_rem).for_each(|(mut o, i)| {
950            o.iter_mut()
951                .zip(i.iter().cloned())
952                .for_each(|(o, i)| *o = i);
953        });
954    }
955}