open_hypergraphs/array/traits.rs
1//! The operations which an array type must support to implement open hypergraphs
2use core::fmt::Debug;
3use core::ops::{Add, Sub};
4use core::ops::{Bound, Range, RangeBounds};
5
6use num_traits::{One, Zero};
7
8/// Array *kinds*.
9/// For example, [`super::vec::VecKind`] is the set of types [`Vec<T>`] for all `T`.
10pub trait ArrayKind: Sized {
11 /// The type of arrays containing elements type T
12 type Type<T>;
13
14 /// The type of index *elements*. For [`super::vec::VecKind`], this is [`usize`].
15 type I: Clone
16 + PartialEq
17 + Ord
18 + Debug
19 + One
20 + Zero
21 + Add<Output = Self::I>
22 + Sub<Output = Self::I>
23 // NOTE: this last constraint that an index can add with rhs an Index array is a hack that
24 // lets us implement `tensor` for finite functions without unnecessary cloning.
25 + for<'a> Add<&'a Self::Index, Output = Self::Index>;
26
27 /// Arrays of indices (isomorphic to `Type<I>`) must implement NaturalArray
28 type Index: NaturalArray<Self>
29 + Into<Self::Type<Self::I>>
30 + From<Self::Type<Self::I>>
31 + AsRef<Self::Type<Self::I>>
32 + AsMut<Self::Type<Self::I>>;
33
34 /// a `Slice` is a read-only view into another array's data.
35 /// For `VecKind` this is `&[T]`.
36 type Slice<'a, T: 'a>; // part of an array
37}
38
39/// Arrays of elements T for some [`ArrayKind`] `K`.
40///
41/// # Panics
42///
43/// Any operation using an index out of range for the given array will panic.
44pub trait Array<K: ArrayKind, T>: Clone + PartialEq<Self> {
45 /// The empty array
46 fn empty() -> Self;
47
48 /// Length of an array
49 fn len(&self) -> K::I;
50
51 /// Test if an array is empty
52 fn is_empty(&self) -> bool {
53 self.len() == K::I::zero()
54 }
55
56 fn from_slice<'a>(slice: K::Slice<'a, T>) -> Self;
57
58 /// Clamp any `R: RangeBounds<K::I>` into the range of valid indices for this array.
59 fn to_range<R: RangeBounds<K::I>>(&self, r: R) -> Range<K::I> {
60 let n = self.len();
61 let start = match r.start_bound().cloned() {
62 Bound::Included(i) => i,
63 Bound::Excluded(i) => i + K::I::one(),
64 Bound::Unbounded => K::I::zero(),
65 };
66
67 // NOTE: Range is *exclusive* of end, so for Included(i) we need to increment!.
68 let end = match r.end_bound().cloned() {
69 Bound::Included(i) => i + K::I::one(),
70 Bound::Excluded(i) => i,
71 Bound::Unbounded => n,
72 };
73
74 Range { start, end }
75 }
76
77 /// Concatenate two arrays
78 fn concatenate(&self, other: &Self) -> Self;
79
80 /// `fill(x, n)` returns the array length n containing repeated element x.
81 fn fill(x: T, n: K::I) -> Self;
82
83 /// Retrieve a single element by its index.
84 fn get(&self, i: K::I) -> T;
85
86 /// Get a contiguous range of the underlying array as a slice.
87 fn get_range<R: RangeBounds<K::I>>(&self, rb: R) -> K::Slice<'_, T>;
88
89 /// Write to a contiguous range of data in an array
90 fn set_range<R: RangeBounds<K::I>>(&mut self, rb: R, v: &K::Type<T>); // mutate self
91
92 /// Gather elements of this array according to the indices.
93 /// <https://en.wikipedia.org/wiki/Gather/scatter_(vector_addressing)#Gather>
94 /// ```text
95 /// x = self.gather(idx) // equivalent to x[i] = self[idx[i]]
96 /// ```
97 fn gather(&self, idx: K::Slice<'_, K::I>) -> Self;
98
99 /// Scatter elements of `self` into a new array at indices `idx`.
100 /// ```text
101 /// x = self.scatter(idx) // equivalent to x[idx[i]] = self[i]
102 /// ```
103 ///
104 /// # Panics
105 ///
106 /// If there is any `i ≥ n` in `idx`
107 fn scatter(&self, idx: K::Slice<'_, K::I>, n: K::I) -> Self;
108
109 /// Numpy `self[ixs] = arg`
110 fn scatter_assign_constant(&mut self, _ixs: &K::Index, _arg: T);
111}
112
113pub trait OrdArray<K: ArrayKind, T>: Clone + PartialEq<Self> + Array<K, T> {
114 /// Produce an array of indices which sorts `self`.
115 /// That is, `self.gather(self.argsort())` is monotonic.
116 fn argsort(&self) -> K::Index;
117
118 /// Sort this array by the given keys
119 ///
120 /// ```rust
121 /// use open_hypergraphs::array::{*, vec::*};
122 /// let values = VecArray(vec![10, 20, 30, 40]);
123 /// let keys = VecArray(vec![3, 1, 0, 2]);
124 /// let expected = VecArray(vec![30, 20, 40, 10]);
125 /// let actual = values.sort_by(&keys);
126 /// assert_eq!(expected, actual);
127 /// ```
128 fn sort_by(&self, key: &Self) -> Self {
129 self.gather(key.argsort().get_range(..))
130 }
131}
132
133/// Arrays of natural numbers.
134/// This is used for computing with *indexes* and *sizes*.
135pub trait NaturalArray<K: ArrayKind>:
136 OrdArray<K, K::I> + Sized + Sub<Self, Output = Self> + Add<Self, Output = Self> + AsRef<K::Index>
137{
138 fn max(&self) -> Option<K::I>;
139
140 /// An inclusive-and-exclusive cumulative sum
141 /// For an input of size `N`, returns an array `x` of size `N+1` where `x[0] = 0` and `x[-1] = sum(x)`
142 fn cumulative_sum(&self) -> Self;
143
144 // NOTE: we can potentially remove this if IndexedCoproduct moves to using pointers instead of
145 // segment sizes.
146 #[must_use]
147 fn sum(&self) -> K::I {
148 if self.len() == K::I::zero() {
149 K::I::zero()
150 } else {
151 self.cumulative_sum().get(self.len())
152 }
153 }
154
155 /// Indices from start to stop
156 ///
157 /// ```rust
158 /// use open_hypergraphs::array::{*, vec::*};
159 /// let x0 = VecArray::arange(&0, &3);
160 /// assert_eq!(x0, VecArray(vec![0, 1, 2]));
161 ///
162 /// let x1 = VecArray::arange(&0, &0);
163 /// assert_eq!(x1, VecArray(vec![]));
164 /// ```
165 fn arange(start: &K::I, stop: &K::I) -> Self;
166
167 /// Repeat each element of the given slice.
168 /// self and x must be equal lengths.
169 fn repeat(&self, x: K::Slice<'_, K::I>) -> Self;
170
171 /// Compute the arrays (self%denominator, self/denominator)
172 ///
173 /// # Panics
174 ///
175 /// When d == 0.
176 fn quot_rem(&self, d: K::I) -> (Self, Self);
177
178 /// Compute `self * c + x`, where `c` is a constant (scalar) and `x` is an array.
179 ///
180 /// # Panics
181 ///
182 /// When self.len() != x.len().
183 fn mul_constant_add(&self, c: K::I, x: &Self) -> Self;
184
185 /// Compute the connected components of a graph with `n` nodes.
186 /// Edges are stored as a pair of arrays of nodes `(sources, targets)`
187 /// meaning that for each `i` there is an edge `sources[i] → targets[i]`.
188 ///
189 /// Since `n` is the number of nodes in the graph, the values in `sources` and `targets` must
190 /// be less than `n`.
191 ///
192 /// # Returns
193 ///
194 /// Returns a pair `(cc_ix, k)`, where `cc_ix[i]` is the connected component for the `i`th
195 /// node, and `k` is the total number of components.
196 ///
197 /// # Panics
198 ///
199 /// * Inequal lengths: `sources.len() != targets.len()`
200 /// * Indexes are out of bounds: `sources[i] >= n` or `targets[i] >= n`.
201 fn connected_components(sources: &Self, targets: &Self, n: K::I) -> (Self, K::I);
202
203 /// Segmented sum of input.
204 /// For example, for `self = [1 2 0]`,
205 /// `self.segmented_sum([1 | 2 3]) = [1 5 0]`.
206 ///
207 /// # Panics
208 ///
209 /// When `self.sum() != x.len()`
210 fn segmented_sum(&self, x: &Self) -> Self {
211 let segment_sizes = self;
212
213 // cumulative sum of segments, including total size (last element)
214 // [ 2 4 ] → [ 0 2 6 ]
215 let ptr = segment_sizes.cumulative_sum();
216
217 // Cumulative sum of values
218 // [ 1 2 3 4 5 6 ] → [ 0 1 3 6 10 15 21 ]
219 let sum = x.cumulative_sum();
220
221 // total number of pointers (num segments + 1)
222 let n = ptr.len();
223
224 // NOTE: we do two allocations for both `gather`s here, but avoiding this
225 // would require complicating the API quite a bit!
226 sum.gather(ptr.get_range(K::I::one()..)) - sum.gather(ptr.get_range(..n - K::I::one()))
227 }
228
229 /// Given an array of *sizes* compute the concatenation of `arange` arrays of each size.
230 ///
231 /// ```rust
232 /// use open_hypergraphs::array::{*, vec::*};
233 /// let x = VecArray::<usize>(vec![2, 3, 0, 5]);
234 /// let y = VecArray::<usize>(vec![0, 1, 0, 1, 2, 0, 1, 2, 3, 4]);
235 /// assert_eq!(x.segmented_arange(), y)
236 /// ```
237 ///
238 /// Default implementation has time complexity:
239 ///
240 /// - Sequential: `O(n)`
241 /// - PRAM CREW: `O(log n)`
242 fn segmented_arange(&self) -> Self {
243 // How this works, by example:
244 // input = [ 2 3 0 5 ]
245 // output = [ 0 1 | 0 1 2 | | 0 1 2 3 4 ]
246 // compute ptrs
247 // p = [ 0 2 5 5 ]
248 // r = [ 0 0 | 2 2 2 | | 5 5 5 5 5 ]
249 // i = [ 0 1 2 3 4 5 6 7 8 9 ]
250 // i - r = [ 0 1 | 0 1 2 | | 0 1 2 3 4 ]
251 // Note: r is computed as repeat(p, n)
252 //
253 // Complexity
254 // O(n) sequential
255 // O(log n) PRAM CREW (cumsum is log n)
256 let p = self.cumulative_sum();
257 let last_idx = p.len() - K::I::one();
258 let sum = p.get(last_idx.clone());
259
260 let r = self.repeat(p.get_range(..last_idx));
261 let i = Self::arange(&K::I::zero(), &sum);
262 i - r
263 }
264
265 /// Count occurrences of each value in the range [0, size)
266 fn bincount(&self, size: K::I) -> K::Index;
267
268 /// Compute index of unique values and their counts
269 fn sparse_bincount(&self) -> (K::Index, K::Index);
270
271 /// Return indices of elements which are zero
272 fn zero(&self) -> K::Index;
273
274 /// Compute `self[ixs] -= rhs`
275 fn scatter_sub_assign(&mut self, ixs: &K::Index, rhs: &K::Index);
276}