1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
//! Sorting and search operations trait.
use crate::error::{Error, Result};
use crate::runtime::Runtime;
use crate::tensor::Tensor;
/// Sorting and search operations trait
///
/// Provides operations for sorting tensors, finding top-k elements, searching,
/// and computing unique values.
pub trait SortingOps<R: Runtime> {
/// Sort tensor along a dimension.
///
/// Returns the sorted values. For both values and indices, use `sort_with_indices`.
///
/// # Arguments
///
/// * `a` - Input tensor
/// * `dim` - Dimension along which to sort (supports negative indexing)
/// * `descending` - If true, sort in descending order
///
/// # Returns
///
/// Tensor with same shape as input, containing sorted values along the specified dimension.
///
/// # Backend Limitations
///
/// - **WebGPU**: Max 512 elements per sort dimension (bitonic sort uses shared memory)
///
/// # Example
///
/// ```
/// # use numr::prelude::*;
/// # use numr::ops::SortingOps;
/// # let device = CpuDevice::new();
/// # let client = CpuRuntime::default_client(&device);
/// let a = Tensor::<CpuRuntime>::from_slice(&[3.0f32, 1.0, 4.0, 1.0, 5.0], &[5], &device);
/// let sorted = client.sort(&a, 0, false)?; // [1.0, 1.0, 3.0, 4.0, 5.0]
/// # Ok::<(), numr::error::Error>(())
/// ```
fn sort(&self, a: &Tensor<R>, dim: isize, descending: bool) -> Result<Tensor<R>> {
let _ = (a, dim, descending);
Err(Error::NotImplemented {
feature: "SortingOps::sort",
})
}
/// Sort tensor along a dimension, returning both sorted values and indices.
///
/// # Arguments
///
/// * `a` - Input tensor
/// * `dim` - Dimension along which to sort (supports negative indexing)
/// * `descending` - If true, sort in descending order
///
/// # Returns
///
/// Tuple of (sorted_values, indices) where:
/// - `sorted_values`: Tensor with same shape and dtype as input, containing sorted values
/// - `indices`: I64 tensor with same shape, containing original indices
///
/// # Backend Limitations
///
/// - **WebGPU**: Max 512 elements per sort dimension (bitonic sort uses shared memory)
///
/// # Example
///
/// ```
/// # use numr::prelude::*;
/// # use numr::ops::SortingOps;
/// # let device = CpuDevice::new();
/// # let client = CpuRuntime::default_client(&device);
/// let a = Tensor::<CpuRuntime>::from_slice(&[3.0f32, 1.0, 4.0], &[3], &device);
/// let (values, indices) = client.sort_with_indices(&a, 0, false)?;
/// // values = [1.0, 3.0, 4.0]
/// // indices = [1, 0, 2]
/// # Ok::<(), numr::error::Error>(())
/// ```
fn sort_with_indices(
&self,
a: &Tensor<R>,
dim: isize,
descending: bool,
) -> Result<(Tensor<R>, Tensor<R>)> {
let _ = (a, dim, descending);
Err(Error::NotImplemented {
feature: "SortingOps::sort_with_indices",
})
}
/// Return indices that would sort the tensor along a dimension.
///
/// Equivalent to `sort_with_indices(...).1`, but more efficient when only
/// indices are needed.
///
/// # Arguments
///
/// * `a` - Input tensor
/// * `dim` - Dimension along which to compute sort indices (supports negative indexing)
/// * `descending` - If true, return indices for descending order
///
/// # Returns
///
/// I64 tensor with same shape as input, containing indices that would sort the tensor.
///
/// # Backend Limitations
///
/// - **WebGPU**: Max 512 elements per sort dimension (bitonic sort uses shared memory)
///
/// # Example
///
/// ```
/// # use numr::prelude::*;
/// # use numr::ops::SortingOps;
/// # let device = CpuDevice::new();
/// # let client = CpuRuntime::default_client(&device);
/// let a = Tensor::<CpuRuntime>::from_slice(&[3.0f32, 1.0, 4.0], &[3], &device);
/// let indices = client.argsort(&a, 0, false)?; // [1, 0, 2]
/// // a[indices] would give [1.0, 3.0, 4.0]
/// # Ok::<(), numr::error::Error>(())
/// ```
fn argsort(&self, a: &Tensor<R>, dim: isize, descending: bool) -> Result<Tensor<R>> {
let _ = (a, dim, descending);
Err(Error::NotImplemented {
feature: "SortingOps::argsort",
})
}
/// Return top K largest (or smallest) values and their indices along a dimension.
///
/// # Arguments
///
/// * `a` - Input tensor
/// * `k` - Number of top elements to return
/// * `dim` - Dimension along which to find top-k (supports negative indexing)
/// * `largest` - If true, return largest elements; if false, return smallest
/// * `sorted` - If true, return in sorted order; if false, maintain relative input order
///
/// # Returns
///
/// Tuple of (values, indices) where:
/// - `values`: Tensor with shape `` `[..., k, ...]` `` (dim replaced with k), same dtype as input
/// - `indices`: I64 tensor with same shape as values, containing original indices
///
/// # Errors
///
/// Returns `InvalidArgument` if k > dim_size.
///
/// # Backend Limitations
///
/// - **WebGPU**: Max 512 elements per sort dimension (bitonic sort uses shared memory)
///
/// # Example
///
/// ```
/// # use numr::prelude::*;
/// # use numr::ops::SortingOps;
/// # let device = CpuDevice::new();
/// # let client = CpuRuntime::default_client(&device);
/// let a = Tensor::<CpuRuntime>::from_slice(&[3.0f32, 1.0, 4.0, 1.0, 5.0], &[5], &device);
/// let (values, indices) = client.topk(&a, 2, 0, true, true)?;
/// // values = [5.0, 4.0] (largest 2, sorted)
/// // indices = [4, 2]
/// # Ok::<(), numr::error::Error>(())
/// ```
fn topk(
&self,
a: &Tensor<R>,
k: usize,
dim: isize,
largest: bool,
sorted: bool,
) -> Result<(Tensor<R>, Tensor<R>)> {
let _ = (a, k, dim, largest, sorted);
Err(Error::NotImplemented {
feature: "SortingOps::topk",
})
}
/// Return unique elements of the input tensor.
///
/// Flattens the tensor and returns unique elements in sorted order.
///
/// # Arguments
///
/// * `a` - Input tensor
/// * `sorted` - If true (default), return unique elements in sorted order
///
/// # Returns
///
/// 1D tensor containing unique elements. Length may vary depending on input.
///
/// # Example
///
/// ```
/// # use numr::prelude::*;
/// # use numr::ops::SortingOps;
/// # let device = CpuDevice::new();
/// # let client = CpuRuntime::default_client(&device);
/// let a = Tensor::<CpuRuntime>::from_slice(&[1.0f32, 2.0, 2.0, 3.0, 1.0], &[5], &device);
/// let unique = client.unique(&a, true)?; // [1.0, 2.0, 3.0]
/// # Ok::<(), numr::error::Error>(())
/// ```
fn unique(&self, a: &Tensor<R>, sorted: bool) -> Result<Tensor<R>> {
let _ = (a, sorted);
Err(Error::NotImplemented {
feature: "SortingOps::unique",
})
}
/// Return unique elements with inverse indices and counts.
///
/// More complete version of `unique` that also returns:
/// - Inverse indices: for each element in input, index into unique output
/// - Counts: how many times each unique element appears
///
/// # Arguments
///
/// * `a` - Input tensor
///
/// # Returns
///
/// Tuple of (unique, inverse_indices, counts) where:
/// - `unique`: 1D tensor of unique values (sorted)
/// - `inverse_indices`: I64 tensor with same shape as flattened input
/// - `counts`: I64 tensor with same length as unique, containing occurrence counts
///
/// # Example
///
/// ```
/// # use numr::prelude::*;
/// # use numr::ops::SortingOps;
/// # let device = CpuDevice::new();
/// # let client = CpuRuntime::default_client(&device);
/// let a = Tensor::<CpuRuntime>::from_slice(&[1.0f32, 2.0, 2.0, 3.0, 1.0], &[5], &device);
/// let (unique, inverse, counts) = client.unique_with_counts(&a)?;
/// // unique = [1.0, 2.0, 3.0]
/// // inverse = [0, 1, 1, 2, 0] (maps each input to index in unique)
/// // counts = [2, 2, 1] (1.0 appears 2x, 2.0 appears 2x, 3.0 appears 1x)
/// # Ok::<(), numr::error::Error>(())
/// ```
fn unique_with_counts(&self, a: &Tensor<R>) -> Result<(Tensor<R>, Tensor<R>, Tensor<R>)> {
let _ = a;
Err(Error::NotImplemented {
feature: "SortingOps::unique_with_counts",
})
}
/// Return indices of non-zero elements.
///
/// Returns a 2D tensor where each row contains the indices of a non-zero element.
///
/// # Arguments
///
/// * `a` - Input tensor
///
/// # Returns
///
/// I64 tensor of shape `` `[N, ndim]` `` where N is the number of non-zero elements
/// and ndim is the number of dimensions in the input tensor. Each row contains
/// the multi-dimensional index of a non-zero element.
///
/// # Example
///
/// ```
/// # use numr::prelude::*;
/// # use numr::ops::SortingOps;
/// # let device = CpuDevice::new();
/// # let client = CpuRuntime::default_client(&device);
/// let a = Tensor::<CpuRuntime>::from_slice(&[0.0f32, 1.0, 0.0, 2.0], &[2, 2], &device);
/// let indices = client.nonzero(&a)?;
/// // indices = [[0, 1], [1, 1]] (positions of 1.0 and 2.0)
/// # Ok::<(), numr::error::Error>(())
/// ```
fn nonzero(&self, a: &Tensor<R>) -> Result<Tensor<R>> {
let _ = a;
Err(Error::NotImplemented {
feature: "SortingOps::nonzero",
})
}
/// Find insertion points for values in a sorted sequence.
///
/// For each value in `values`, finds the index in `sorted_sequence` where the value
/// should be inserted to maintain sorted order.
///
/// # Arguments
///
/// * `sorted_sequence` - 1D sorted tensor
/// * `values` - Values to search for (any shape)
/// * `right` - If true, find rightmost insertion point; if false, leftmost
///
/// # Returns
///
/// I64 tensor with same shape as `values`, containing insertion indices.
///
/// # Example
///
/// ```
/// # use numr::prelude::*;
/// # use numr::ops::SortingOps;
/// # let device = CpuDevice::new();
/// # let client = CpuRuntime::default_client(&device);
/// let sorted = Tensor::<CpuRuntime>::from_slice(&[1.0f32, 3.0, 5.0, 7.0], &[4], &device);
/// let values = Tensor::<CpuRuntime>::from_slice(&[2.0f32, 4.0, 6.0], &[3], &device);
/// let indices = client.searchsorted(&sorted, &values, false)?;
/// // indices = [1, 2, 3] (insert positions to maintain order)
/// # Ok::<(), numr::error::Error>(())
/// ```
fn searchsorted(
&self,
sorted_sequence: &Tensor<R>,
values: &Tensor<R>,
right: bool,
) -> Result<Tensor<R>> {
let _ = (sorted_sequence, values, right);
Err(Error::NotImplemented {
feature: "SortingOps::searchsorted",
})
}
}