Skip to main content

open_hypergraphs/array/
traits.rs

1//! The operations which an array type must support to implement open hypergraphs
2use core::fmt::Debug;
3use core::ops::{Add, Sub};
4use core::ops::{Bound, Range, RangeBounds};
5
6use num_traits::{One, Zero};
7
8/// Array *kinds*.
9/// For example, [`super::vec::VecKind`] is the set of types [`Vec<T>`] for all `T`.
10pub trait ArrayKind: Sized {
11    /// The type of arrays containing elements type T
12    type Type<T>;
13
14    /// The type of index *elements*. For [`super::vec::VecKind`], this is [`usize`].
15    type I: Clone
16        + PartialEq
17        + Ord
18        + Debug
19        + One
20        + Zero
21        + Add<Output = Self::I>
22        + Sub<Output = Self::I>
23        // NOTE: this last constraint that an index can add with rhs an Index array is a hack that
24        // lets us implement `tensor` for finite functions without unnecessary cloning.
25        + for<'a> Add<&'a Self::Index, Output = Self::Index>;
26
27    /// Arrays of indices (isomorphic to `Type<I>`) must implement NaturalArray
28    type Index: NaturalArray<Self>
29        + Into<Self::Type<Self::I>>
30        + From<Self::Type<Self::I>>
31        + AsRef<Self::Type<Self::I>>
32        + AsMut<Self::Type<Self::I>>
33        + PartialEq;
34
35    /// a `Slice` is a read-only view into another array's data.
36    /// For `VecKind` this is `&[T]`.
37    type Slice<'a, T: 'a>; // part of an array
38}
39
40/// Arrays of elements T for some [`ArrayKind`] `K`.
41///
42/// # Panics
43///
44/// Any operation using an index out of range for the given array will panic.
45pub trait Array<K: ArrayKind, T>: Clone {
46    /// The empty array
47    fn empty() -> Self;
48
49    /// Length of an array
50    fn len(&self) -> K::I;
51
52    /// Test if an array is empty
53    fn is_empty(&self) -> bool {
54        self.len() == K::I::zero()
55    }
56
57    fn from_slice(slice: K::Slice<'_, T>) -> Self;
58
59    /// Clamp any `R: RangeBounds<K::I>` into the range of valid indices for this array.
60    fn to_range<R: RangeBounds<K::I>>(&self, r: R) -> Range<K::I> {
61        let n = self.len();
62        let start = match r.start_bound().cloned() {
63            Bound::Included(i) => i,
64            Bound::Excluded(i) => i + K::I::one(),
65            Bound::Unbounded => K::I::zero(),
66        };
67
68        // NOTE: Range is *exclusive* of end, so for Included(i) we need to increment!.
69        let end = match r.end_bound().cloned() {
70            Bound::Included(i) => i + K::I::one(),
71            Bound::Excluded(i) => i,
72            Bound::Unbounded => n,
73        };
74
75        Range { start, end }
76    }
77
78    /// Concatenate two arrays
79    fn concatenate(&self, other: &Self) -> Self;
80
81    /// Concatenate a slice of arrays into one array.
82    fn concatenate_many(arrays: &[&Self]) -> Self;
83
84    /// `fill(x, n)` returns the array length n containing repeated element x.
85    fn fill(x: T, n: K::I) -> Self;
86
87    /// Retrieve a single element by its index.
88    fn get(&self, i: K::I) -> T;
89
90    /// Get a contiguous range of the underlying array as a slice.
91    fn get_range<R: RangeBounds<K::I>>(&self, rb: R) -> K::Slice<'_, T>;
92
93    /// Write to a contiguous range of data in an array
94    fn set_range<R: RangeBounds<K::I>>(&mut self, rb: R, v: &K::Type<T>); // mutate self
95
96    /// Gather elements of this array according to the indices.
97    /// <https://en.wikipedia.org/wiki/Gather/scatter_(vector_addressing)#Gather>
98    /// ```text
99    /// x = self.gather(idx) // equivalent to x[i] = self[idx[i]]
100    /// ```
101    fn gather(&self, idx: K::Slice<'_, K::I>) -> Self;
102
103    /// Scatter elements of `self` into a new array at indices `idx`.
104    /// ```text
105    /// x = self.scatter(idx) // equivalent to x[idx[i]] = self[i]
106    /// ```
107    ///
108    /// # Panics
109    ///
110    /// If there is any `i ≥ n` in `idx`
111    fn scatter(&self, idx: K::Slice<'_, K::I>, n: K::I) -> Self;
112
113    fn scatter_assign(&mut self, ixs: &K::Index, values: Self);
114
115    /// Numpy `self[ixs] = arg`
116    fn scatter_assign_constant(&mut self, ixs: &K::Index, arg: T);
117}
118
119pub trait OrdArray<K: ArrayKind, T>: Clone + Array<K, T> {
120    /// Produce an array of indices which sorts `self`.
121    /// That is, `self.gather(self.argsort())` is monotonic.
122    fn argsort(&self) -> K::Index;
123
124    /// Sort this array by the given keys
125    ///
126    /// ```rust
127    /// use open_hypergraphs::array::{*, vec::*};
128    /// let values = VecArray(vec![10, 20, 30, 40]);
129    /// let keys = VecArray(vec![3, 1, 0, 2]);
130    /// let expected = VecArray(vec![30, 20, 40, 10]);
131    /// let actual = values.sort_by(&keys);
132    /// assert_eq!(expected, actual);
133    /// ```
134    fn sort_by(&self, key: &Self) -> Self {
135        self.gather(key.argsort().get_range(..))
136    }
137}
138
139/// Arrays of natural numbers.
140/// This is used for computing with *indexes* and *sizes*.
141pub trait NaturalArray<K: ArrayKind>:
142    OrdArray<K, K::I> + Sized + Sub<Self, Output = Self> + Add<Self, Output = Self> + AsRef<K::Index>
143{
144    fn max(&self) -> Option<K::I>;
145
146    /// An inclusive-and-exclusive cumulative sum
147    /// For an input of size `N`, returns an array `x` of size `N+1` where `x[0] = 0` and `x[-1] = sum(x)`
148    fn cumulative_sum(&self) -> Self;
149
150    // NOTE: we can potentially remove this if IndexedCoproduct moves to using pointers instead of
151    // segment sizes.
152    #[must_use]
153    fn sum(&self) -> K::I {
154        if self.len() == K::I::zero() {
155            K::I::zero()
156        } else {
157            self.cumulative_sum().get(self.len())
158        }
159    }
160
161    /// Indices from start to stop
162    ///
163    /// ```rust
164    /// use open_hypergraphs::array::{*, vec::*};
165    /// let x0 = VecArray::arange(&0, &3);
166    /// assert_eq!(x0, VecArray(vec![0, 1, 2]));
167    ///
168    /// let x1 = VecArray::arange(&0, &0);
169    /// assert_eq!(x1, VecArray(vec![]));
170    /// ```
171    fn arange(start: &K::I, stop: &K::I) -> Self;
172
173    /// Repeat each element of the given slice.
174    /// self and x must be equal lengths.
175    fn repeat(&self, x: K::Slice<'_, K::I>) -> Self;
176
177    /// Compute the arrays (self%denominator, self/denominator)
178    ///
179    /// # Panics
180    ///
181    /// When d == 0.
182    fn quot_rem(&self, d: K::I) -> (Self, Self);
183
184    /// Compute `self * c + x`, where `c` is a constant (scalar) and `x` is an array.
185    ///
186    /// # Panics
187    ///
188    /// When self.len() != x.len().
189    fn mul_constant_add(&self, c: K::I, x: &Self) -> Self;
190
191    /// Compute the connected components of a graph with `n` nodes.
192    /// Edges are stored as a pair of arrays of nodes `(sources, targets)`
193    /// meaning that for each `i` there is an edge `sources[i] → targets[i]`.
194    ///
195    /// Since `n` is the number of nodes in the graph, the values in `sources` and `targets` must
196    /// be less than `n`.
197    ///
198    /// # Returns
199    ///
200    /// Returns a pair `(cc_ix, k)`, where `cc_ix[i]` is the connected component for the `i`th
201    /// node, and `k` is the total number of components.
202    ///
203    /// # Panics
204    ///
205    /// * Inequal lengths: `sources.len() != targets.len()`
206    /// * Indexes are out of bounds: `sources[i] >= n` or `targets[i] >= n`.
207    fn connected_components(sources: &Self, targets: &Self, n: K::I) -> (Self, K::I);
208
209    /// Segmented sum of input.
210    /// For example, for `self = [1 2 0]`,
211    /// `self.segmented_sum([1 | 2 3]) = [1 5 0]`.
212    ///
213    /// # Panics
214    ///
215    /// When `self.sum() != x.len()`
216    fn segmented_sum(&self, x: &Self) -> Self {
217        let segment_sizes = self;
218
219        // cumulative sum of segments, including total size (last element)
220        // [ 2 4 ] → [ 0 2 6 ]
221        let ptr = segment_sizes.cumulative_sum();
222
223        // Cumulative sum of values
224        // [ 1 2 3 4 5 6 ] → [ 0 1 3 6 10 15 21 ]
225        let sum = x.cumulative_sum();
226
227        // total number of pointers (num segments + 1)
228        let n = ptr.len();
229
230        // NOTE: we do two allocations for both `gather`s here, but avoiding this
231        // would require complicating the API quite a bit!
232        sum.gather(ptr.get_range(K::I::one()..)) - sum.gather(ptr.get_range(..n - K::I::one()))
233    }
234
235    /// Given an array of *sizes* compute the concatenation of `arange` arrays of each size.
236    ///
237    /// ```rust
238    /// use open_hypergraphs::array::{*, vec::*};
239    /// let x = VecArray::<usize>(vec![2, 3, 0, 5]);
240    /// let y = VecArray::<usize>(vec![0, 1, 0, 1, 2, 0, 1, 2, 3, 4]);
241    /// assert_eq!(x.segmented_arange(), y)
242    /// ```
243    ///
244    /// Default implementation has time complexity:
245    ///
246    /// - Sequential: `O(n)`
247    /// - PRAM CREW: `O(log n)`
248    fn segmented_arange(&self) -> Self {
249        // How this works, by example:
250        //   input   = [ 2 3 0 5 ]
251        //   output  = [ 0 1 | 0 1 2 | | 0 1 2 3 4 ]
252        // compute ptrs
253        //   p       = [ 0 2 5 5 ]
254        //   r       = [ 0 0 | 2 2 2 | | 5 5 5 5 5 ]
255        //   i       = [ 0 1   2 3 4     5 6 7 8 9 ]
256        //   i - r   = [ 0 1 | 0 1 2 | | 0 1 2 3 4 ]
257        // Note: r is computed as repeat(p, n)
258        //
259        // Complexity
260        //   O(n)     sequential
261        //   O(log n) PRAM CREW (cumsum is log n)
262        let p = self.cumulative_sum();
263        let last_idx = p.len() - K::I::one();
264        let sum = p.get(last_idx.clone());
265
266        let r = self.repeat(p.get_range(..last_idx));
267        let i = Self::arange(&K::I::zero(), &sum);
268        i - r
269    }
270
271    /// Count occurrences of each value in the range [0, size)
272    fn bincount(&self, size: K::I) -> K::Index;
273
274    /// Compute index of unique values and their counts
275    fn sparse_bincount(&self) -> (K::Index, K::Index);
276
277    /// Return indices of elements which are zero
278    fn zero(&self) -> K::Index;
279
280    /// Compute `self[ixs] -= rhs`
281    fn scatter_sub_assign(&mut self, ixs: &K::Index, rhs: &K::Index);
282}