1use std::cmp::Ordering;
9use std::collections::HashMap;
10
11use runmat_accelerate_api::{
12 GpuTensorHandle, HostTensorOwned, UniqueOccurrence, UniqueOptions, UniqueOrder, UniqueResult,
13};
14use runmat_builtins::{CharArray, ComplexTensor, StringArray, Tensor, Value};
15use runmat_macros::runtime_builtin;
16
17use crate::builtins::common::gpu_helpers;
18use crate::builtins::common::random_args::complex_tensor_into_value;
19use crate::builtins::common::spec::{
20 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
21 ProviderHook, ReductionNaN, ResidencyPolicy, ScalarType, ShapeRequirements,
22};
23use crate::builtins::common::tensor;
24#[cfg(feature = "doc_export")]
25use crate::register_builtin_doc_text;
26use crate::{register_builtin_fusion_spec, register_builtin_gpu_spec};
27
28#[cfg(feature = "doc_export")]
29pub const DOC_MD: &str = r#"---
30title: "unique"
31category: "array/sorting_sets"
32keywords: ["unique", "set", "distinct", "stable", "rows", "indices", "gpu", "string", "char"]
33summary: "Return the unique elements or rows of arrays with optional index outputs."
34references:
35 - https://www.mathworks.com/help/matlab/ref/unique.html
36gpu_support:
37 elementwise: false
38 reduction: false
39 precisions: ["f32", "f64"]
40 broadcasting: "none"
41 notes: "Uses the provider `unique` hook when available; default providers download to host memory and reuse the CPU implementation."
42fusion:
43 elementwise: false
44 reduction: false
45 max_inputs: 1
46 constants: "inline"
47requires_feature: null
48tested:
49 unit: "builtins::array::sorting_sets::unique::tests"
50 integration: "builtins::array::sorting_sets::unique::tests::unique_gpu_roundtrip"
51---
52
53# What does the `unique` function do in MATLAB / RunMat?
54`unique` removes duplicates from its input while providing optional index outputs
55that map between the original data and the returned distinct values (or rows).
56By default results are sorted, but you can request stable order, operate on rows,
57and choose whether the first or last occurrence is retained.
58
59## How does the `unique` function behave in MATLAB / RunMat?
60- `unique(A)` flattens numeric, logical, character, string, or complex arrays column-major into a vector of unique values sorted ascending.
61- `[C, IA] = unique(A)` also returns the indices of the selected occurrences (`IA`) so that `C = A(IA)`.
62- `[C, IA, IC] = unique(A)` provides `IC`, the mapping from each element of `A` to the corresponding index in `C`.
63- `unique(A, 'stable')` preserves the first appearance order rather than sorting.
64- `unique(A, 'rows')` treats each row as an observation and returns a matrix (or char/string array) whose rows are unique.
65- `unique(A, 'last')` or `'first'` controls which occurrence contributes to `IA` (defaults to `'first'`).
66- Combinations such as `unique(A, 'rows', 'stable', 'last')` follow MATLAB's precedence rules; mutually exclusive flags (e.g. `'sorted'` with `'stable'`) are rejected.
67- Empty inputs return empty outputs with consistent dimensions.
68- Legacy switches such as `'legacy'` or `'R2012a'` are not supported; RunMat always follows the modern MATLAB semantics.
69
70## `unique` Function GPU Execution Behaviour
71`unique` is registered as a residency sink. When the provider exposes the custom
72`unique` hook, the runtime can execute the operation entirely on the device and keep
73results resident. If the active provider does not implement that hook, RunMat gathers
74the data to host memory, performs the CPU implementation, and returns host-resident
75outputs so subsequent MATLAB code observes the same values and ordering.
76
77## Examples of using the `unique` function in MATLAB / RunMat
78
79### Getting Sorted Unique Values
80```matlab
81A = [3 1 3 2];
82C = unique(A);
83```
84Expected output:
85```matlab
86C =
87 1
88 2
89 3
90```
91
92### Preserving Input Order with `'stable'`
93```matlab
94A = [4 2 4 1 2];
95C = unique(A, 'stable');
96```
97Expected output:
98```matlab
99C =
100 4
101 2
102 1
103```
104
105### Returning Indices for Reconstruction
106```matlab
107A = [7 5 7 3];
108[C, IA, IC] = unique(A);
109reconstructed = C(IC);
110```
111Expected output:
112```matlab
113C =
114 3
115 5
116 7
117IA =
118 4
119 2
120 1
121IC =
122 3
123 2
124 3
125 1
126reconstructed =
127 7
128 5
129 7
130 3
131```
132
133### Finding Unique Rows in a Matrix
134```matlab
135A = [1 3; 1 3; 2 4; 1 2];
136[C, IA, IC] = unique(A, 'rows');
137```
138Expected output:
139```matlab
140C =
141 1 2
142 1 3
143 2 4
144IA =
145 4
146 1
147 3
148IC =
149 2
150 2
151 3
152 1
153```
154
155### Selecting Last Occurrences
156```matlab
157A = [9 8 9 7 8];
158[C, IA] = unique(A, 'last');
159```
160Expected output:
161```matlab
162C =
163 7
164 8
165 9
166IA =
167 4
168 5
169 3
170```
171
172### Working with Empty Arrays
173```matlab
174A = zeros(0, 3);
175[C, IA, IC] = unique(A, 'rows');
176```
177Expected output:
178```matlab
179C =
180IA =
181IC =
182```
183(all outputs are empty with compatible dimensions.)
184
185### Using `unique` on GPU Arrays
186```matlab
187G = gpuArray([5 3 5 1]);
188[C, IA, IC] = unique(G, 'stable');
189```
190RunMat gathers `G` to the host (until providers implement a device kernel) and returns:
191```matlab
192C =
193 5
194 3
195 1
196IA =
197 1
198 2
199 4
200IC =
201 1
202 2
203 1
204 3
205```
206
207### Unique Characters in a Char Array
208```matlab
209chars = ['m','z'; 'm','a'];
210[C, IA] = unique(chars);
211```
212Expected output (`C` is a column vector of characters):
213```matlab
214C =
215 a
216 m
217 z
218IA =
219 4
220 1
221 3
222```
223
224### Unique Strings with Row Deduplication
225```matlab
226S = ["alpha" "beta"; "alpha" "beta"; "gamma" "beta"];
227[C, IA, IC] = unique(S, 'rows', 'stable');
228```
229Expected output:
230```matlab
231C =
232 2x2 string array
233 "alpha" "beta"
234 "gamma" "beta"
235IA =
236 1
237 3
238IC =
239 1
240 1
241 2
242```
243
244## FAQ
245
246### Which ordering does `unique` use by default?
247Results are sorted in ascending order unless you pass `'stable'`, which preserves the first occurrence order.
248
249### How are the index outputs defined?
250`IA` indexes into the original data (or rows). `IC` is a column vector mapping each element (or row) of the input to the position of the corresponding unique value in `C`.
251
252### What do `'first'` and `'last'` control?
253They determine whether `IA` references the first or last occurrence of each distinct value/row. They do not affect `C` or `IC`.
254
255### Can I combine `'rows'` with `'stable'` or `'last'`?
256Yes. All permutations of `'rows'`, `'stable'`/`'sorted'`, and `'first'`/`'last'` are accepted. The runtime enforces MATLAB's validation rules.
257
258### Does `unique` support complex numbers or characters?
259Yes. Complex values use magnitude ordering for the sorted output, and character or string arrays produce results in their native container types (char arrays and string arrays respectively).
260
261### How does `unique` treat NaN values?
262All NaN values are considered equal. Sorted outputs place NaNs at the end; stable outputs keep their original relative order.
263
264### Are GPU arrays supported?
265Yes. When a provider lacks a native kernel, RunMat gathers GPU arrays to host memory and executes the host implementation, guaranteeing MATLAB-compatible output.
266
267### Does `unique` preserve array shape?
268Scalar outputs remain scalars. Otherwise, values are returned as column vectors (for element mode) or matrices with the same number of columns as the input (for `'rows'`).
269
270### What happens with empty inputs?
271Empty inputs (including empty matrices) return empty outputs with matching dimensions, and index outputs are empty column vectors.
272
273### Is `unique` stable?
274Sorting is stable where applicable; ties preserve their relative order. You can also request `'stable'` explicitly.
275
276## See Also
277[sort](./sort), [sortrows](./sortrows), [argsort](./argsort)
278
279## Source & Feedback
280- Source code: [`crates/runmat-runtime/src/builtins/array/sorting_sets/unique.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/array/sorting_sets/unique.rs)
281- Found a bug? [Open an issue](https://github.com/runmat-org/runmat/issues/new/choose) with details and a minimal repro.
282"#;
283
284pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
285 name: "unique",
286 op_kind: GpuOpKind::Custom("unique"),
287 supported_precisions: &[ScalarType::F32, ScalarType::F64],
288 broadcast: BroadcastSemantics::None,
289 provider_hooks: &[ProviderHook::Custom("unique")],
290 constant_strategy: ConstantStrategy::InlineLiteral,
291 residency: ResidencyPolicy::GatherImmediately,
292 nan_mode: ReductionNaN::Include,
293 two_pass_threshold: None,
294 workgroup_size: None,
295 accepts_nan_mode: true,
296 notes: "Providers may implement the `unique` hook; default providers download tensors and reuse the CPU implementation.",
297};
298
299register_builtin_gpu_spec!(GPU_SPEC);
300
301pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
302 name: "unique",
303 shape: ShapeRequirements::Any,
304 constant_strategy: ConstantStrategy::InlineLiteral,
305 elementwise: None,
306 reduction: None,
307 emits_nan: true,
308 notes: "`unique` terminates fusion chains and materialises results on the host; upstream tensors are gathered when necessary.",
309};
310
311register_builtin_fusion_spec!(FUSION_SPEC);
312
313#[cfg(feature = "doc_export")]
314register_builtin_doc_text!("unique", DOC_MD);
315
316#[runtime_builtin(
317 name = "unique",
318 category = "array/sorting_sets",
319 summary = "Return the unique elements or rows of arrays with optional index outputs.",
320 keywords = "unique,set,distinct,stable,rows,indices,gpu",
321 accel = "array_construct",
322 sink = true
323)]
324fn unique_builtin(value: Value, rest: Vec<Value>) -> Result<Value, String> {
325 evaluate(value, &rest).map(|eval| eval.into_values_value())
326}
327
328pub fn evaluate(value: Value, rest: &[Value]) -> Result<UniqueEvaluation, String> {
330 let opts = parse_options(rest)?;
331 match value {
332 Value::GpuTensor(handle) => unique_gpu(handle, &opts),
333 other => unique_host(other, &opts),
334 }
335}
336
337fn parse_options(rest: &[Value]) -> Result<UniqueOptions, String> {
338 let mut opts = UniqueOptions {
339 rows: false,
340 order: UniqueOrder::Sorted,
341 occurrence: UniqueOccurrence::First,
342 };
343 let mut seen_order: Option<UniqueOrder> = None;
344 let mut seen_occurrence: Option<UniqueOccurrence> = None;
345
346 for arg in rest {
347 let text = tensor::value_to_string(arg)
348 .ok_or_else(|| "unique: expected string option arguments".to_string())?;
349 let lowered = text.trim().to_ascii_lowercase();
350 match lowered.as_str() {
351 "sorted" => {
352 if let Some(prev) = seen_order {
353 if prev != UniqueOrder::Sorted {
354 return Err("unique: cannot combine 'sorted' with 'stable'".to_string());
355 }
356 }
357 seen_order = Some(UniqueOrder::Sorted);
358 opts.order = UniqueOrder::Sorted;
359 }
360 "stable" => {
361 if let Some(prev) = seen_order {
362 if prev != UniqueOrder::Stable {
363 return Err("unique: cannot combine 'sorted' with 'stable'".to_string());
364 }
365 }
366 seen_order = Some(UniqueOrder::Stable);
367 opts.order = UniqueOrder::Stable;
368 }
369 "rows" => {
370 opts.rows = true;
371 }
372 "first" => {
373 if let Some(prev) = seen_occurrence {
374 if prev != UniqueOccurrence::First {
375 return Err("unique: cannot combine 'first' with 'last'".to_string());
376 }
377 }
378 seen_occurrence = Some(UniqueOccurrence::First);
379 opts.occurrence = UniqueOccurrence::First;
380 }
381 "last" => {
382 if let Some(prev) = seen_occurrence {
383 if prev != UniqueOccurrence::Last {
384 return Err("unique: cannot combine 'first' with 'last'".to_string());
385 }
386 }
387 seen_occurrence = Some(UniqueOccurrence::Last);
388 opts.occurrence = UniqueOccurrence::Last;
389 }
390 "legacy" | "r2012a" => {
391 return Err("unique: the 'legacy' behaviour is not supported".to_string());
392 }
393 other => {
394 return Err(format!("unique: unrecognised option '{other}'"));
395 }
396 }
397 }
398
399 Ok(opts)
400}
401
402fn unique_gpu(handle: GpuTensorHandle, opts: &UniqueOptions) -> Result<UniqueEvaluation, String> {
403 if let Some(provider) = runmat_accelerate_api::provider() {
404 if let Ok(result) = provider.unique(&handle, opts) {
405 return UniqueEvaluation::from_unique_result(result);
406 }
407 }
408 let tensor = gpu_helpers::gather_tensor(&handle)?;
409 unique_numeric_from_tensor(tensor, opts)
410}
411
412fn unique_host(value: Value, opts: &UniqueOptions) -> Result<UniqueEvaluation, String> {
413 match value {
414 Value::Tensor(tensor) => unique_numeric_from_tensor(tensor, opts),
415 Value::Num(n) => {
416 let tensor = Tensor::new(vec![n], vec![1, 1]).map_err(|e| format!("unique: {e}"))?;
417 unique_numeric_from_tensor(tensor, opts)
418 }
419 Value::Int(i) => {
420 let tensor = Tensor::new(vec![i.to_f64()], vec![1, 1])
421 .map_err(|e| format!("unique: {e}"))?;
422 unique_numeric_from_tensor(tensor, opts)
423 }
424 Value::Bool(b) => {
425 let tensor = Tensor::new(vec![if b { 1.0 } else { 0.0 }], vec![1, 1])
426 .map_err(|e| format!("unique: {e}"))?;
427 unique_numeric_from_tensor(tensor, opts)
428 }
429 Value::LogicalArray(logical) => {
430 let tensor = tensor::logical_to_tensor(&logical)?;
431 unique_numeric_from_tensor(tensor, opts)
432 }
433 Value::ComplexTensor(tensor) => unique_complex_from_tensor(tensor, opts),
434 Value::Complex(re, im) => {
435 let tensor = ComplexTensor::new(vec![(re, im)], vec![1, 1])
436 .map_err(|e| format!("unique: {e}"))?;
437 unique_complex_from_tensor(tensor, opts)
438 }
439 Value::CharArray(array) => unique_char_array(array, opts),
440 Value::StringArray(array) => unique_string_array(array, opts),
441 Value::String(s) => {
442 let array = StringArray::new(vec![s], vec![1, 1]).map_err(|e| format!("unique: {e}"))?;
443 unique_string_array(array, opts)
444 }
445 other => Err(format!(
446 "unique: unsupported input type {:?}; expected numeric, logical, char, string, or complex values",
447 other
448 )),
449 }
450}
451
452pub fn unique_numeric_from_tensor(
453 tensor: Tensor,
454 opts: &UniqueOptions,
455) -> Result<UniqueEvaluation, String> {
456 if opts.rows {
457 unique_numeric_rows(tensor, opts)
458 } else {
459 unique_numeric_elements(tensor, opts)
460 }
461}
462
463fn unique_numeric_elements(
464 tensor: Tensor,
465 opts: &UniqueOptions,
466) -> Result<UniqueEvaluation, String> {
467 let len = tensor.data.len();
468 if len == 0 {
469 let values = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
470 let ia = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
471 let ic = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
472 return Ok(UniqueEvaluation::new(
473 tensor::tensor_into_value(values),
474 ia,
475 ic,
476 ));
477 }
478
479 let mut entries = Vec::<NumericElementEntry>::new();
480 let mut map: HashMap<u64, usize> = HashMap::new();
481 let mut element_entry_index = Vec::with_capacity(len);
482
483 for (idx, &value) in tensor.data.iter().enumerate() {
484 let key = canonicalize_f64(value);
485 match map.get(&key) {
486 Some(&entry_idx) => {
487 entries[entry_idx].last = idx;
488 element_entry_index.push(entry_idx);
489 }
490 None => {
491 let entry_idx = entries.len();
492 entries.push(NumericElementEntry {
493 value,
494 first: idx,
495 last: idx,
496 });
497 map.insert(key, entry_idx);
498 element_entry_index.push(entry_idx);
499 }
500 }
501 }
502
503 let mut order: Vec<usize> = (0..entries.len()).collect();
504 if opts.order == UniqueOrder::Sorted {
505 order.sort_by(|&a, &b| compare_f64(entries[a].value, entries[b].value));
506 }
507
508 let mut entry_to_position = vec![0usize; entries.len()];
509 for (pos, &entry_idx) in order.iter().enumerate() {
510 entry_to_position[entry_idx] = pos;
511 }
512
513 let mut values = Vec::with_capacity(order.len());
514 let mut ia = Vec::with_capacity(order.len());
515 for &entry_idx in &order {
516 let entry = &entries[entry_idx];
517 values.push(entry.value);
518 let occurrence = match opts.occurrence {
519 UniqueOccurrence::First => entry.first,
520 UniqueOccurrence::Last => entry.last,
521 };
522 ia.push((occurrence + 1) as f64);
523 }
524
525 let mut ic = Vec::with_capacity(len);
526 for entry_idx in element_entry_index {
527 let pos = entry_to_position[entry_idx];
528 ic.push((pos + 1) as f64);
529 }
530
531 let value_tensor =
532 Tensor::new(values, vec![order.len(), 1]).map_err(|e| format!("unique: {e}"))?;
533 let ia_tensor = Tensor::new(ia, vec![order.len(), 1]).map_err(|e| format!("unique: {e}"))?;
534 let ic_tensor = Tensor::new(ic, vec![len, 1]).map_err(|e| format!("unique: {e}"))?;
535
536 Ok(UniqueEvaluation::new(
537 tensor::tensor_into_value(value_tensor),
538 ia_tensor,
539 ic_tensor,
540 ))
541}
542
543fn unique_numeric_rows(tensor: Tensor, opts: &UniqueOptions) -> Result<UniqueEvaluation, String> {
544 if tensor.shape.len() != 2 {
545 return Err("unique: 'rows' option requires a 2-D matrix input".to_string());
546 }
547 let rows = tensor.shape[0];
548 let cols = tensor.shape[1];
549
550 if rows == 0 || cols == 0 {
551 let values = Tensor::new(Vec::new(), vec![0, cols]).map_err(|e| format!("unique: {e}"))?;
552 let ia = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
553 let ic = Tensor::new(Vec::new(), vec![rows, 1]).map_err(|e| format!("unique: {e}"))?;
554 return Ok(UniqueEvaluation::new(
555 tensor::tensor_into_value(values),
556 ia,
557 ic,
558 ));
559 }
560
561 let mut entries = Vec::<NumericRowEntry>::new();
562 let mut map: HashMap<NumericRowKey, usize> = HashMap::new();
563 let mut row_entry_index = Vec::with_capacity(rows);
564
565 for r in 0..rows {
566 let mut row_values = Vec::with_capacity(cols);
567 for c in 0..cols {
568 let idx = r + c * rows;
569 row_values.push(tensor.data[idx]);
570 }
571 let key = NumericRowKey::from_slice(&row_values);
572 match map.get(&key) {
573 Some(&entry_idx) => {
574 entries[entry_idx].last = r;
575 row_entry_index.push(entry_idx);
576 }
577 None => {
578 let entry_idx = entries.len();
579 entries.push(NumericRowEntry {
580 row_data: row_values.clone(),
581 first: r,
582 last: r,
583 });
584 map.insert(key, entry_idx);
585 row_entry_index.push(entry_idx);
586 }
587 }
588 }
589
590 let mut order: Vec<usize> = (0..entries.len()).collect();
591 if opts.order == UniqueOrder::Sorted {
592 order.sort_by(|&a, &b| compare_numeric_rows(&entries[a].row_data, &entries[b].row_data));
593 }
594
595 let mut entry_to_position = vec![0usize; entries.len()];
596 for (pos, &entry_idx) in order.iter().enumerate() {
597 entry_to_position[entry_idx] = pos;
598 }
599
600 let unique_rows_count = order.len();
601 let mut values = vec![0.0f64; unique_rows_count * cols];
602 for (row_pos, &entry_idx) in order.iter().enumerate() {
603 let row = &entries[entry_idx].row_data;
604 for (col, value) in row.iter().enumerate().take(cols) {
605 let dest = row_pos + col * unique_rows_count;
606 values[dest] = *value;
607 }
608 }
609
610 let mut ia = Vec::with_capacity(unique_rows_count);
611 for &entry_idx in &order {
612 let entry = &entries[entry_idx];
613 let occurrence = match opts.occurrence {
614 UniqueOccurrence::First => entry.first,
615 UniqueOccurrence::Last => entry.last,
616 };
617 ia.push((occurrence + 1) as f64);
618 }
619
620 let mut ic = Vec::with_capacity(rows);
621 for entry_idx in row_entry_index {
622 let pos = entry_to_position[entry_idx];
623 ic.push((pos + 1) as f64);
624 }
625
626 let value_tensor =
627 Tensor::new(values, vec![unique_rows_count, cols]).map_err(|e| format!("unique: {e}"))?;
628 let ia_tensor =
629 Tensor::new(ia, vec![unique_rows_count, 1]).map_err(|e| format!("unique: {e}"))?;
630 let ic_tensor = Tensor::new(ic, vec![rows, 1]).map_err(|e| format!("unique: {e}"))?;
631
632 Ok(UniqueEvaluation::new(
633 tensor::tensor_into_value(value_tensor),
634 ia_tensor,
635 ic_tensor,
636 ))
637}
638
639fn unique_complex_from_tensor(
640 tensor: ComplexTensor,
641 opts: &UniqueOptions,
642) -> Result<UniqueEvaluation, String> {
643 if opts.rows {
644 unique_complex_rows(tensor, opts)
645 } else {
646 unique_complex_elements(tensor, opts)
647 }
648}
649
650fn unique_complex_elements(
651 tensor: ComplexTensor,
652 opts: &UniqueOptions,
653) -> Result<UniqueEvaluation, String> {
654 let len = tensor.data.len();
655 if len == 0 {
656 let values =
657 ComplexTensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
658 let ia = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
659 let ic = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
660 return Ok(UniqueEvaluation::new(
661 complex_tensor_into_value(values),
662 ia,
663 ic,
664 ));
665 }
666
667 let mut entries = Vec::<ComplexElementEntry>::new();
668 let mut map: HashMap<ComplexKey, usize> = HashMap::new();
669 let mut element_entry_index = Vec::with_capacity(len);
670
671 for (idx, &value) in tensor.data.iter().enumerate() {
672 let key = ComplexKey::new(value);
673 match map.get(&key) {
674 Some(&entry_idx) => {
675 entries[entry_idx].last = idx;
676 element_entry_index.push(entry_idx);
677 }
678 None => {
679 let entry_idx = entries.len();
680 entries.push(ComplexElementEntry {
681 value,
682 first: idx,
683 last: idx,
684 });
685 map.insert(key, entry_idx);
686 element_entry_index.push(entry_idx);
687 }
688 }
689 }
690
691 let mut order: Vec<usize> = (0..entries.len()).collect();
692 if opts.order == UniqueOrder::Sorted {
693 order.sort_by(|&a, &b| compare_complex(entries[a].value, entries[b].value));
694 }
695
696 let mut entry_to_position = vec![0usize; entries.len()];
697 for (pos, &entry_idx) in order.iter().enumerate() {
698 entry_to_position[entry_idx] = pos;
699 }
700
701 let mut values = Vec::with_capacity(order.len());
702 let mut ia = Vec::with_capacity(order.len());
703 for &entry_idx in &order {
704 let entry = &entries[entry_idx];
705 values.push(entry.value);
706 let occurrence = match opts.occurrence {
707 UniqueOccurrence::First => entry.first,
708 UniqueOccurrence::Last => entry.last,
709 };
710 ia.push((occurrence + 1) as f64);
711 }
712
713 let mut ic = Vec::with_capacity(len);
714 for entry_idx in element_entry_index {
715 let pos = entry_to_position[entry_idx];
716 ic.push((pos + 1) as f64);
717 }
718
719 let value_tensor =
720 ComplexTensor::new(values, vec![order.len(), 1]).map_err(|e| format!("unique: {e}"))?;
721 let ia_tensor = Tensor::new(ia, vec![order.len(), 1]).map_err(|e| format!("unique: {e}"))?;
722 let ic_tensor = Tensor::new(ic, vec![len, 1]).map_err(|e| format!("unique: {e}"))?;
723
724 Ok(UniqueEvaluation::new(
725 complex_tensor_into_value(value_tensor),
726 ia_tensor,
727 ic_tensor,
728 ))
729}
730
731fn unique_complex_rows(
732 tensor: ComplexTensor,
733 opts: &UniqueOptions,
734) -> Result<UniqueEvaluation, String> {
735 if tensor.shape.len() != 2 {
736 return Err("unique: 'rows' option requires a 2-D matrix input".to_string());
737 }
738 let rows = tensor.shape[0];
739 let cols = tensor.shape[1];
740
741 if rows == 0 || cols == 0 {
742 let values =
743 ComplexTensor::new(Vec::new(), vec![rows, cols]).map_err(|e| format!("unique: {e}"))?;
744 let ia = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
745 let ic = Tensor::new(Vec::new(), vec![rows, 1]).map_err(|e| format!("unique: {e}"))?;
746 return Ok(UniqueEvaluation::new(
747 complex_tensor_into_value(values),
748 ia,
749 ic,
750 ));
751 }
752
753 let mut entries = Vec::<ComplexRowEntry>::new();
754 let mut map: HashMap<Vec<ComplexKey>, usize> = HashMap::new();
755 let mut row_entry_index = Vec::with_capacity(rows);
756
757 for r in 0..rows {
758 let mut row_values = Vec::with_capacity(cols);
759 let mut key_row = Vec::with_capacity(cols);
760 for c in 0..cols {
761 let idx = r + c * rows;
762 let value = tensor.data[idx];
763 row_values.push(value);
764 key_row.push(ComplexKey::new(value));
765 }
766 match map.get(&key_row) {
767 Some(&entry_idx) => {
768 entries[entry_idx].last = r;
769 row_entry_index.push(entry_idx);
770 }
771 None => {
772 let entry_idx = entries.len();
773 entries.push(ComplexRowEntry {
774 row_data: row_values.clone(),
775 first: r,
776 last: r,
777 });
778 map.insert(key_row, entry_idx);
779 row_entry_index.push(entry_idx);
780 }
781 }
782 }
783
784 let mut order: Vec<usize> = (0..entries.len()).collect();
785 if opts.order == UniqueOrder::Sorted {
786 order.sort_by(|&a, &b| compare_complex_rows(&entries[a].row_data, &entries[b].row_data));
787 }
788
789 let mut entry_to_position = vec![0usize; entries.len()];
790 for (pos, &entry_idx) in order.iter().enumerate() {
791 entry_to_position[entry_idx] = pos;
792 }
793
794 let unique_rows_count = order.len();
795 let mut values = vec![(0.0, 0.0); unique_rows_count * cols];
796 for (row_pos, &entry_idx) in order.iter().enumerate() {
797 let row = &entries[entry_idx].row_data;
798 for (col, value) in row.iter().enumerate().take(cols) {
799 let dest = row_pos + col * unique_rows_count;
800 values[dest] = *value;
801 }
802 }
803
804 let mut ia = Vec::with_capacity(unique_rows_count);
805 for &entry_idx in &order {
806 let entry = &entries[entry_idx];
807 let occurrence = match opts.occurrence {
808 UniqueOccurrence::First => entry.first,
809 UniqueOccurrence::Last => entry.last,
810 };
811 ia.push((occurrence + 1) as f64);
812 }
813
814 let mut ic = Vec::with_capacity(rows);
815 for entry_idx in row_entry_index {
816 let pos = entry_to_position[entry_idx];
817 ic.push((pos + 1) as f64);
818 }
819
820 let value_tensor = ComplexTensor::new(values, vec![unique_rows_count, cols])
821 .map_err(|e| format!("unique: {e}"))?;
822 let ia_tensor =
823 Tensor::new(ia, vec![unique_rows_count, 1]).map_err(|e| format!("unique: {e}"))?;
824 let ic_tensor = Tensor::new(ic, vec![rows, 1]).map_err(|e| format!("unique: {e}"))?;
825
826 Ok(UniqueEvaluation::new(
827 complex_tensor_into_value(value_tensor),
828 ia_tensor,
829 ic_tensor,
830 ))
831}
832
833fn unique_char_array(array: CharArray, opts: &UniqueOptions) -> Result<UniqueEvaluation, String> {
834 if opts.rows {
835 unique_char_rows(array, opts)
836 } else {
837 unique_char_elements(array, opts)
838 }
839}
840
841fn unique_char_elements(
842 array: CharArray,
843 opts: &UniqueOptions,
844) -> Result<UniqueEvaluation, String> {
845 let rows = array.rows;
846 let cols = array.cols;
847 let total = rows * cols;
848 if total == 0 {
849 let values = CharArray::new(Vec::new(), 0, 0).map_err(|e| format!("unique: {e}"))?;
850 let ia = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
851 let ic = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
852 return Ok(UniqueEvaluation::new(Value::CharArray(values), ia, ic));
853 }
854
855 let mut entries = Vec::<CharElementEntry>::new();
856 let mut map: HashMap<u32, usize> = HashMap::new();
857 let mut element_entry_index = Vec::with_capacity(total);
858
859 for col in 0..cols {
860 for row in 0..rows {
861 let linear_idx = row + col * rows;
862 let data_idx = row * cols + col;
863 let ch = array.data[data_idx];
864 let key = ch as u32;
865 match map.get(&key) {
866 Some(&entry_idx) => {
867 entries[entry_idx].last = linear_idx;
868 element_entry_index.push(entry_idx);
869 }
870 None => {
871 let entry_idx = entries.len();
872 entries.push(CharElementEntry {
873 ch,
874 first: linear_idx,
875 last: linear_idx,
876 });
877 map.insert(key, entry_idx);
878 element_entry_index.push(entry_idx);
879 }
880 }
881 }
882 }
883
884 let mut order: Vec<usize> = (0..entries.len()).collect();
885 if opts.order == UniqueOrder::Sorted {
886 order.sort_by(|&a, &b| entries[a].ch.cmp(&entries[b].ch));
887 }
888
889 let mut entry_to_position = vec![0usize; entries.len()];
890 for (pos, &entry_idx) in order.iter().enumerate() {
891 entry_to_position[entry_idx] = pos;
892 }
893
894 let mut values = Vec::with_capacity(order.len());
895 let mut ia = Vec::with_capacity(order.len());
896 for &entry_idx in &order {
897 let entry = &entries[entry_idx];
898 values.push(entry.ch);
899 let occurrence = match opts.occurrence {
900 UniqueOccurrence::First => entry.first,
901 UniqueOccurrence::Last => entry.last,
902 };
903 ia.push((occurrence + 1) as f64);
904 }
905
906 let mut ic = Vec::with_capacity(total);
907 for entry_idx in element_entry_index {
908 let pos = entry_to_position[entry_idx];
909 ic.push((pos + 1) as f64);
910 }
911
912 let value_array = CharArray::new(values, order.len(), 1).map_err(|e| format!("unique: {e}"))?;
913 let ia_tensor = Tensor::new(ia, vec![order.len(), 1]).map_err(|e| format!("unique: {e}"))?;
914 let ic_tensor = Tensor::new(ic, vec![total, 1]).map_err(|e| format!("unique: {e}"))?;
915
916 Ok(UniqueEvaluation::new(
917 Value::CharArray(value_array),
918 ia_tensor,
919 ic_tensor,
920 ))
921}
922
923fn unique_char_rows(array: CharArray, opts: &UniqueOptions) -> Result<UniqueEvaluation, String> {
924 let rows = array.rows;
925 let cols = array.cols;
926 if rows == 0 {
927 let values = CharArray::new(Vec::new(), 0, cols).map_err(|e| format!("unique: {e}"))?;
928 let ia = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
929 let ic = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
930 return Ok(UniqueEvaluation::new(Value::CharArray(values), ia, ic));
931 }
932
933 let mut entries = Vec::<CharRowEntry>::new();
934 let mut map: HashMap<RowCharKey, usize> = HashMap::new();
935 let mut row_entry_index = Vec::with_capacity(rows);
936
937 for r in 0..rows {
938 let start = r * cols;
939 let end = start + cols;
940 let slice = &array.data[start..end];
941 let key = RowCharKey::from_slice(slice);
942 match map.get(&key) {
943 Some(&entry_idx) => {
944 entries[entry_idx].last = r;
945 row_entry_index.push(entry_idx);
946 }
947 None => {
948 let entry_idx = entries.len();
949 entries.push(CharRowEntry {
950 row_data: slice.to_vec(),
951 first: r,
952 last: r,
953 });
954 map.insert(key, entry_idx);
955 row_entry_index.push(entry_idx);
956 }
957 }
958 }
959
960 let mut order: Vec<usize> = (0..entries.len()).collect();
961 if opts.order == UniqueOrder::Sorted {
962 order.sort_by(|&a, &b| compare_char_rows(&entries[a].row_data, &entries[b].row_data));
963 }
964
965 let mut entry_to_position = vec![0usize; entries.len()];
966 for (pos, &entry_idx) in order.iter().enumerate() {
967 entry_to_position[entry_idx] = pos;
968 }
969
970 let unique_rows_count = order.len();
971 let mut values = vec!['\0'; unique_rows_count * cols];
972 for (row_pos, &entry_idx) in order.iter().enumerate() {
973 let row = &entries[entry_idx].row_data;
974 for col in 0..cols {
975 let dest = row_pos * cols + col;
976 if col < row.len() {
977 values[dest] = row[col];
978 }
979 }
980 }
981
982 let mut ia = Vec::with_capacity(unique_rows_count);
983 for &entry_idx in &order {
984 let entry = &entries[entry_idx];
985 let occurrence = match opts.occurrence {
986 UniqueOccurrence::First => entry.first,
987 UniqueOccurrence::Last => entry.last,
988 };
989 ia.push((occurrence + 1) as f64);
990 }
991
992 let mut ic = Vec::with_capacity(rows);
993 for entry_idx in row_entry_index {
994 let pos = entry_to_position[entry_idx];
995 ic.push((pos + 1) as f64);
996 }
997
998 let value_array =
999 CharArray::new(values, unique_rows_count, cols).map_err(|e| format!("unique: {e}"))?;
1000 let ia_tensor =
1001 Tensor::new(ia, vec![unique_rows_count, 1]).map_err(|e| format!("unique: {e}"))?;
1002 let ic_tensor = Tensor::new(ic, vec![rows, 1]).map_err(|e| format!("unique: {e}"))?;
1003
1004 Ok(UniqueEvaluation::new(
1005 Value::CharArray(value_array),
1006 ia_tensor,
1007 ic_tensor,
1008 ))
1009}
1010
1011fn unique_string_array(
1012 array: StringArray,
1013 opts: &UniqueOptions,
1014) -> Result<UniqueEvaluation, String> {
1015 if opts.rows {
1016 unique_string_rows(array, opts)
1017 } else {
1018 unique_string_elements(array, opts)
1019 }
1020}
1021
1022fn unique_string_elements(
1023 array: StringArray,
1024 opts: &UniqueOptions,
1025) -> Result<UniqueEvaluation, String> {
1026 let len = array.data.len();
1027 if len == 0 {
1028 let values =
1029 StringArray::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
1030 let ia = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
1031 let ic = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
1032 return Ok(UniqueEvaluation::new(Value::StringArray(values), ia, ic));
1033 }
1034
1035 let mut entries = Vec::<StringElementEntry>::new();
1036 let mut map: HashMap<String, usize> = HashMap::new();
1037 let mut element_entry_index = Vec::with_capacity(len);
1038
1039 for (idx, value) in array.data.iter().enumerate() {
1040 match map.get(value) {
1041 Some(&entry_idx) => {
1042 entries[entry_idx].last = idx;
1043 element_entry_index.push(entry_idx);
1044 }
1045 None => {
1046 let entry_idx = entries.len();
1047 entries.push(StringElementEntry {
1048 value: value.clone(),
1049 first: idx,
1050 last: idx,
1051 });
1052 map.insert(value.clone(), entry_idx);
1053 element_entry_index.push(entry_idx);
1054 }
1055 }
1056 }
1057
1058 let mut order: Vec<usize> = (0..entries.len()).collect();
1059 if opts.order == UniqueOrder::Sorted {
1060 order.sort_by(|&a, &b| entries[a].value.cmp(&entries[b].value));
1061 }
1062
1063 let mut entry_to_position = vec![0usize; entries.len()];
1064 for (pos, &entry_idx) in order.iter().enumerate() {
1065 entry_to_position[entry_idx] = pos;
1066 }
1067
1068 let mut values = Vec::with_capacity(order.len());
1069 let mut ia = Vec::with_capacity(order.len());
1070 for &entry_idx in &order {
1071 let entry = &entries[entry_idx];
1072 values.push(entry.value.clone());
1073 let occurrence = match opts.occurrence {
1074 UniqueOccurrence::First => entry.first,
1075 UniqueOccurrence::Last => entry.last,
1076 };
1077 ia.push((occurrence + 1) as f64);
1078 }
1079
1080 let mut ic = Vec::with_capacity(len);
1081 for entry_idx in element_entry_index {
1082 let pos = entry_to_position[entry_idx];
1083 ic.push((pos + 1) as f64);
1084 }
1085
1086 let value_array =
1087 StringArray::new(values, vec![order.len(), 1]).map_err(|e| format!("unique: {e}"))?;
1088 let ia_tensor = Tensor::new(ia, vec![order.len(), 1]).map_err(|e| format!("unique: {e}"))?;
1089 let ic_tensor = Tensor::new(ic, vec![len, 1]).map_err(|e| format!("unique: {e}"))?;
1090
1091 Ok(UniqueEvaluation::new(
1092 Value::StringArray(value_array),
1093 ia_tensor,
1094 ic_tensor,
1095 ))
1096}
1097
1098fn unique_string_rows(
1099 array: StringArray,
1100 opts: &UniqueOptions,
1101) -> Result<UniqueEvaluation, String> {
1102 if array.shape.len() != 2 {
1103 return Err("unique: 'rows' option requires a 2-D matrix input".to_string());
1104 }
1105 let rows = array.shape[0];
1106 let cols = array.shape[1];
1107
1108 if rows == 0 {
1109 let values =
1110 StringArray::new(Vec::new(), vec![0, cols]).map_err(|e| format!("unique: {e}"))?;
1111 let ia = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
1112 let ic = Tensor::new(Vec::new(), vec![0, 1]).map_err(|e| format!("unique: {e}"))?;
1113 return Ok(UniqueEvaluation::new(Value::StringArray(values), ia, ic));
1114 }
1115
1116 let mut entries = Vec::<StringRowEntry>::new();
1117 let mut map: HashMap<RowStringKey, usize> = HashMap::new();
1118 let mut row_entry_index = Vec::with_capacity(rows);
1119
1120 for r in 0..rows {
1121 let mut row_values = Vec::with_capacity(cols);
1122 for c in 0..cols {
1123 let idx = r + c * rows;
1124 row_values.push(array.data[idx].clone());
1125 }
1126 let key = RowStringKey(row_values.clone());
1127 match map.get(&key) {
1128 Some(&entry_idx) => {
1129 entries[entry_idx].last = r;
1130 row_entry_index.push(entry_idx);
1131 }
1132 None => {
1133 let entry_idx = entries.len();
1134 entries.push(StringRowEntry {
1135 row_data: row_values.clone(),
1136 first: r,
1137 last: r,
1138 });
1139 map.insert(key, entry_idx);
1140 row_entry_index.push(entry_idx);
1141 }
1142 }
1143 }
1144
1145 let mut order: Vec<usize> = (0..entries.len()).collect();
1146 if opts.order == UniqueOrder::Sorted {
1147 order.sort_by(|&a, &b| compare_string_rows(&entries[a].row_data, &entries[b].row_data));
1148 }
1149
1150 let mut entry_to_position = vec![0usize; entries.len()];
1151 for (pos, &entry_idx) in order.iter().enumerate() {
1152 entry_to_position[entry_idx] = pos;
1153 }
1154
1155 let unique_rows_count = order.len();
1156 let mut values = vec![String::new(); unique_rows_count * cols];
1157 for (row_pos, &entry_idx) in order.iter().enumerate() {
1158 let row = &entries[entry_idx].row_data;
1159 for (col, value) in row.iter().enumerate().take(cols) {
1160 let dest = row_pos + col * unique_rows_count;
1161 values[dest] = value.clone();
1162 }
1163 }
1164
1165 let mut ia = Vec::with_capacity(unique_rows_count);
1166 for &entry_idx in &order {
1167 let entry = &entries[entry_idx];
1168 let occurrence = match opts.occurrence {
1169 UniqueOccurrence::First => entry.first,
1170 UniqueOccurrence::Last => entry.last,
1171 };
1172 ia.push((occurrence + 1) as f64);
1173 }
1174
1175 let mut ic = Vec::with_capacity(rows);
1176 for entry_idx in row_entry_index {
1177 let pos = entry_to_position[entry_idx];
1178 ic.push((pos + 1) as f64);
1179 }
1180
1181 let value_array = StringArray::new(values, vec![unique_rows_count, cols])
1182 .map_err(|e| format!("unique: {e}"))?;
1183 let ia_tensor =
1184 Tensor::new(ia, vec![unique_rows_count, 1]).map_err(|e| format!("unique: {e}"))?;
1185 let ic_tensor = Tensor::new(ic, vec![rows, 1]).map_err(|e| format!("unique: {e}"))?;
1186
1187 Ok(UniqueEvaluation::new(
1188 Value::StringArray(value_array),
1189 ia_tensor,
1190 ic_tensor,
1191 ))
1192}
1193
1194#[derive(Debug)]
1195struct NumericElementEntry {
1196 value: f64,
1197 first: usize,
1198 last: usize,
1199}
1200
1201#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1202struct NumericRowKey(Vec<u64>);
1203
1204impl NumericRowKey {
1205 fn from_slice(values: &[f64]) -> Self {
1206 NumericRowKey(values.iter().map(|&v| canonicalize_f64(v)).collect())
1207 }
1208}
1209
1210#[derive(Debug, Clone)]
1211struct NumericRowEntry {
1212 row_data: Vec<f64>,
1213 first: usize,
1214 last: usize,
1215}
1216
1217#[derive(Debug)]
1218struct ComplexElementEntry {
1219 value: (f64, f64),
1220 first: usize,
1221 last: usize,
1222}
1223
1224#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
1225struct ComplexKey {
1226 re: u64,
1227 im: u64,
1228}
1229
1230impl ComplexKey {
1231 fn new(value: (f64, f64)) -> Self {
1232 Self {
1233 re: canonicalize_f64(value.0),
1234 im: canonicalize_f64(value.1),
1235 }
1236 }
1237}
1238
1239#[derive(Debug, Clone)]
1240struct ComplexRowEntry {
1241 row_data: Vec<(f64, f64)>,
1242 first: usize,
1243 last: usize,
1244}
1245
1246#[derive(Debug)]
1247struct CharElementEntry {
1248 ch: char,
1249 first: usize,
1250 last: usize,
1251}
1252
1253#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1254struct RowCharKey(Vec<u32>);
1255
1256impl RowCharKey {
1257 fn from_slice(values: &[char]) -> Self {
1258 RowCharKey(values.iter().map(|&ch| ch as u32).collect())
1259 }
1260}
1261
1262#[derive(Debug, Clone)]
1263struct CharRowEntry {
1264 row_data: Vec<char>,
1265 first: usize,
1266 last: usize,
1267}
1268
1269#[derive(Debug, Clone)]
1270struct StringElementEntry {
1271 value: String,
1272 first: usize,
1273 last: usize,
1274}
1275
1276#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1277struct RowStringKey(Vec<String>);
1278
1279#[derive(Debug, Clone)]
1280struct StringRowEntry {
1281 row_data: Vec<String>,
1282 first: usize,
1283 last: usize,
1284}
1285
1286fn canonicalize_f64(value: f64) -> u64 {
1287 if value.is_nan() {
1288 0x7ff8_0000_0000_0000u64
1289 } else if value == 0.0 {
1290 0u64
1291 } else {
1292 value.to_bits()
1293 }
1294}
1295
1296fn compare_f64(a: f64, b: f64) -> Ordering {
1297 if a.is_nan() {
1298 if b.is_nan() {
1299 Ordering::Equal
1300 } else {
1301 Ordering::Greater
1302 }
1303 } else if b.is_nan() {
1304 Ordering::Less
1305 } else {
1306 a.partial_cmp(&b).unwrap_or(Ordering::Equal)
1307 }
1308}
1309
1310fn compare_numeric_rows(a: &[f64], b: &[f64]) -> Ordering {
1311 for (lhs, rhs) in a.iter().zip(b.iter()) {
1312 let ord = compare_f64(*lhs, *rhs);
1313 if ord != Ordering::Equal {
1314 return ord;
1315 }
1316 }
1317 Ordering::Equal
1318}
1319
1320fn complex_is_nan(value: (f64, f64)) -> bool {
1321 value.0.is_nan() || value.1.is_nan()
1322}
1323
1324fn compare_complex(a: (f64, f64), b: (f64, f64)) -> Ordering {
1325 match (complex_is_nan(a), complex_is_nan(b)) {
1326 (true, true) => Ordering::Equal,
1327 (true, false) => Ordering::Greater,
1328 (false, true) => Ordering::Less,
1329 (false, false) => {
1330 let mag_a = a.0.hypot(a.1);
1331 let mag_b = b.0.hypot(b.1);
1332 let mag_cmp = compare_f64(mag_a, mag_b);
1333 if mag_cmp != Ordering::Equal {
1334 return mag_cmp;
1335 }
1336 let re_cmp = compare_f64(a.0, b.0);
1337 if re_cmp != Ordering::Equal {
1338 return re_cmp;
1339 }
1340 compare_f64(a.1, b.1)
1341 }
1342 }
1343}
1344
1345fn compare_complex_rows(a: &[(f64, f64)], b: &[(f64, f64)]) -> Ordering {
1346 for (lhs, rhs) in a.iter().zip(b.iter()) {
1347 let ord = compare_complex(*lhs, *rhs);
1348 if ord != Ordering::Equal {
1349 return ord;
1350 }
1351 }
1352 Ordering::Equal
1353}
1354
1355fn compare_char_rows(a: &[char], b: &[char]) -> Ordering {
1356 for (lhs, rhs) in a.iter().zip(b.iter()) {
1357 let ord = lhs.cmp(rhs);
1358 if ord != Ordering::Equal {
1359 return ord;
1360 }
1361 }
1362 Ordering::Equal
1363}
1364
1365fn compare_string_rows(a: &[String], b: &[String]) -> Ordering {
1366 for (lhs, rhs) in a.iter().zip(b.iter()) {
1367 let ord = lhs.cmp(rhs);
1368 if ord != Ordering::Equal {
1369 return ord;
1370 }
1371 }
1372 Ordering::Equal
1373}
1374
1375#[derive(Debug)]
1376pub struct UniqueEvaluation {
1377 values: Value,
1378 ia: Tensor,
1379 ic: Tensor,
1380}
1381
1382impl UniqueEvaluation {
1383 fn new(values: Value, ia: Tensor, ic: Tensor) -> Self {
1384 Self { values, ia, ic }
1385 }
1386
1387 pub fn into_values_value(self) -> Value {
1388 self.values
1389 }
1390
1391 pub fn into_pair(self) -> (Value, Value) {
1392 let ia = tensor::tensor_into_value(self.ia);
1393 (self.values, ia)
1394 }
1395
1396 pub fn into_triple(self) -> (Value, Value, Value) {
1397 let ia = tensor::tensor_into_value(self.ia);
1398 let ic = tensor::tensor_into_value(self.ic);
1399 (self.values, ia, ic)
1400 }
1401
1402 pub fn from_unique_result(result: UniqueResult) -> Result<Self, String> {
1403 let UniqueResult { values, ia, ic } = result;
1404 let values_tensor =
1405 Tensor::new(values.data, values.shape).map_err(|e| format!("unique: {e}"))?;
1406 let ia_tensor = Tensor::new(ia.data, ia.shape).map_err(|e| format!("unique: {e}"))?;
1407 let ic_tensor = Tensor::new(ic.data, ic.shape).map_err(|e| format!("unique: {e}"))?;
1408 Ok(UniqueEvaluation::new(
1409 tensor::tensor_into_value(values_tensor),
1410 ia_tensor,
1411 ic_tensor,
1412 ))
1413 }
1414
1415 pub fn into_numeric_unique_result(self) -> Result<UniqueResult, String> {
1416 let UniqueEvaluation { values, ia, ic } = self;
1417 let values_tensor = tensor::value_into_tensor_for("unique", values)?;
1418 Ok(UniqueResult {
1419 values: HostTensorOwned {
1420 data: values_tensor.data,
1421 shape: values_tensor.shape,
1422 },
1423 ia: HostTensorOwned {
1424 data: ia.data,
1425 shape: ia.shape,
1426 },
1427 ic: HostTensorOwned {
1428 data: ic.data,
1429 shape: ic.shape,
1430 },
1431 })
1432 }
1433
1434 pub fn ia_value(&self) -> Value {
1435 tensor::tensor_into_value(self.ia.clone())
1436 }
1437
1438 pub fn ic_value(&self) -> Value {
1439 tensor::tensor_into_value(self.ic.clone())
1440 }
1441}
1442
1443#[cfg(test)]
1444mod tests {
1445 use super::*;
1446 use crate::builtins::common::test_support;
1447 use runmat_builtins::{CharArray, IntValue, LogicalArray, StringArray, Tensor, Value};
1448
1449 #[test]
1450 fn unique_sorted_default() {
1451 let tensor = Tensor::new(vec![3.0, 1.0, 3.0, 2.0], vec![4, 1]).unwrap();
1452 let eval = evaluate(Value::Tensor(tensor), &[]).expect("unique");
1453 let (values, ia, ic) = eval.into_triple();
1454 match values {
1455 Value::Tensor(t) => {
1456 assert_eq!(t.data, vec![1.0, 2.0, 3.0]);
1457 assert_eq!(t.shape, vec![3, 1]);
1458 }
1459 Value::Num(_) => panic!("expected tensor result"),
1460 other => panic!("unexpected result {other:?}"),
1461 }
1462 match ia {
1463 Value::Tensor(t) => assert_eq!(t.data, vec![2.0, 4.0, 1.0]),
1464 other => panic!("unexpected IA {other:?}"),
1465 }
1466 match ic {
1467 Value::Tensor(t) => assert_eq!(t.data, vec![3.0, 1.0, 3.0, 2.0]),
1468 other => panic!("unexpected IC {other:?}"),
1469 }
1470 }
1471
1472 #[test]
1473 fn unique_sorted_handles_nan() {
1474 let tensor = Tensor::new(vec![f64::NAN, 2.0, f64::NAN, 1.0], vec![4, 1]).unwrap();
1475 let eval = evaluate(Value::Tensor(tensor), &[]).expect("unique");
1476 let (values, ..) = eval.into_triple();
1477 match values {
1478 Value::Tensor(t) => {
1479 assert_eq!(t.data.len(), 3);
1480 assert_eq!(t.data[0], 1.0);
1481 assert_eq!(t.data[1], 2.0);
1482 assert!(t.data[2].is_nan());
1483 }
1484 other => panic!("unexpected values {other:?}"),
1485 }
1486 }
1487
1488 #[test]
1489 fn unique_stable_with_nan() {
1490 let tensor = Tensor::new(vec![f64::NAN, 2.0, f64::NAN, 1.0], vec![4, 1]).unwrap();
1491 let eval = evaluate(Value::Tensor(tensor), &[Value::from("stable")]).expect("unique");
1492 let (values, ..) = eval.into_triple();
1493 match values {
1494 Value::Tensor(t) => {
1495 assert!(t.data[0].is_nan());
1496 assert_eq!(t.data[1], 2.0);
1497 assert_eq!(t.data[2], 1.0);
1498 }
1499 other => panic!("unexpected values {other:?}"),
1500 }
1501 }
1502
1503 #[test]
1504 fn unique_stable_preserves_order() {
1505 let tensor = Tensor::new(vec![4.0, 2.0, 4.0, 1.0, 2.0], vec![5, 1]).unwrap();
1506 let eval = evaluate(Value::Tensor(tensor), &[Value::from("stable")]).expect("unique");
1507 let (values, ia) = eval.into_pair();
1508 match values {
1509 Value::Tensor(t) => assert_eq!(t.data, vec![4.0, 2.0, 1.0]),
1510 other => panic!("unexpected values {other:?}"),
1511 }
1512 match ia {
1513 Value::Tensor(t) => assert_eq!(t.data, vec![1.0, 2.0, 4.0]),
1514 other => panic!("unexpected IA {other:?}"),
1515 }
1516 }
1517
1518 #[test]
1519 fn unique_last_occurrence() {
1520 let tensor = Tensor::new(vec![9.0, 8.0, 9.0, 7.0, 8.0], vec![5, 1]).unwrap();
1521 let eval = evaluate(Value::Tensor(tensor), &[Value::from("last")]).expect("unique");
1522 let (values, ia, ic) = eval.into_triple();
1523 match values {
1524 Value::Tensor(t) => assert_eq!(t.data, vec![7.0, 8.0, 9.0]),
1525 other => panic!("unexpected values {other:?}"),
1526 }
1527 match ia {
1528 Value::Tensor(t) => assert_eq!(t.data, vec![4.0, 5.0, 3.0]),
1529 other => panic!("unexpected IA {other:?}"),
1530 }
1531 match ic {
1532 Value::Tensor(t) => assert_eq!(t.data, vec![3.0, 2.0, 3.0, 1.0, 2.0]),
1533 other => panic!("unexpected IC {other:?}"),
1534 }
1535 }
1536
1537 #[test]
1538 fn unique_rows_sorted_default() {
1539 let tensor = Tensor::new(vec![1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 4.0, 2.0], vec![4, 2]).unwrap();
1540 let eval = evaluate(Value::Tensor(tensor), &[Value::from("rows")]).expect("unique");
1541 let (values, ia, ic) = eval.into_triple();
1542 match values {
1543 Value::Tensor(t) => {
1544 assert_eq!(t.shape, vec![3, 2]);
1545 assert_eq!(t.data, vec![1.0, 1.0, 2.0, 2.0, 3.0, 4.0]);
1546 }
1547 other => panic!("unexpected values {other:?}"),
1548 }
1549 match ia {
1550 Value::Tensor(t) => assert_eq!(t.data, vec![4.0, 1.0, 3.0]),
1551 other => panic!("unexpected IA {other:?}"),
1552 }
1553 match ic {
1554 Value::Tensor(t) => assert_eq!(t.data, vec![2.0, 2.0, 3.0, 1.0]),
1555 other => panic!("unexpected IC {other:?}"),
1556 }
1557 }
1558
1559 #[test]
1560 fn unique_rows_stable_last() {
1561 let tensor = Tensor::new(vec![1.0, 1.0, 2.0, 1.0, 1.0, 2.0], vec![3, 2]).unwrap();
1562 let eval = evaluate(
1563 Value::Tensor(tensor),
1564 &[
1565 Value::from("rows"),
1566 Value::from("stable"),
1567 Value::from("last"),
1568 ],
1569 )
1570 .expect("unique");
1571 let (values, ia, ic) = eval.into_triple();
1572 match values {
1573 Value::Tensor(t) => {
1574 assert_eq!(t.shape, vec![2, 2]);
1575 assert_eq!(t.data, vec![1.0, 2.0, 1.0, 2.0]);
1576 }
1577 other => panic!("unexpected values {other:?}"),
1578 }
1579 match ia {
1580 Value::Tensor(t) => assert_eq!(t.data, vec![2.0, 3.0]),
1581 other => panic!("unexpected IA {other:?}"),
1582 }
1583 match ic {
1584 Value::Tensor(t) => assert_eq!(t.data, vec![1.0, 1.0, 2.0]),
1585 other => panic!("unexpected IC {other:?}"),
1586 }
1587 }
1588
1589 #[test]
1590 fn unique_char_elements_sorted() {
1591 let chars = CharArray::new(vec!['m', 'z', 'm', 'a'], 2, 2).unwrap();
1592 let eval = evaluate(Value::CharArray(chars), &[]).expect("unique");
1593 let (values, ia, ic) = eval.into_triple();
1594 match values {
1595 Value::CharArray(arr) => {
1596 assert_eq!(arr.rows, 3);
1597 assert_eq!(arr.cols, 1);
1598 assert_eq!(arr.data, vec!['a', 'm', 'z']);
1599 }
1600 other => panic!("unexpected values {other:?}"),
1601 }
1602 match ia {
1603 Value::Tensor(t) => assert_eq!(t.data, vec![4.0, 1.0, 3.0]),
1604 other => panic!("unexpected IA {other:?}"),
1605 }
1606 match ic {
1607 Value::Tensor(t) => assert_eq!(t.data, vec![2.0, 2.0, 3.0, 1.0]),
1608 other => panic!("unexpected IC {other:?}"),
1609 }
1610 }
1611
1612 #[test]
1613 fn unique_char_rows_last() {
1614 let chars = CharArray::new(vec!['a', 'b', 'a', 'b', 'a', 'c'], 3, 2).unwrap();
1615 let eval = evaluate(
1616 Value::CharArray(chars),
1617 &[Value::from("rows"), Value::from("last")],
1618 )
1619 .expect("unique");
1620 let (values, ia, ic) = eval.into_triple();
1621 match values {
1622 Value::CharArray(arr) => {
1623 assert_eq!(arr.rows, 2);
1624 assert_eq!(arr.cols, 2);
1625 assert_eq!(arr.data, vec!['a', 'b', 'a', 'c']);
1626 }
1627 other => panic!("unexpected values {other:?}"),
1628 }
1629 match ia {
1630 Value::Tensor(t) => assert_eq!(t.data, vec![2.0, 3.0]),
1631 other => panic!("unexpected IA {other:?}"),
1632 }
1633 match ic {
1634 Value::Tensor(t) => assert_eq!(t.data, vec![1.0, 1.0, 2.0]),
1635 other => panic!("unexpected IC {other:?}"),
1636 }
1637 }
1638
1639 #[test]
1640 fn unique_string_elements_stable() {
1641 let array = StringArray::new(
1642 vec!["beta".into(), "alpha".into(), "beta".into()],
1643 vec![3, 1],
1644 )
1645 .unwrap();
1646 let eval = evaluate(Value::StringArray(array), &[Value::from("stable")]).expect("unique");
1647 let (values, ia, ic) = eval.into_triple();
1648 match values {
1649 Value::StringArray(sa) => {
1650 assert_eq!(sa.data, vec!["beta", "alpha"]);
1651 assert_eq!(sa.shape, vec![2, 1]);
1652 }
1653 other => panic!("unexpected values {other:?}"),
1654 }
1655 match ia {
1656 Value::Tensor(t) => assert_eq!(t.data, vec![1.0, 2.0]),
1657 other => panic!("unexpected IA {other:?}"),
1658 }
1659 match ic {
1660 Value::Tensor(t) => assert_eq!(t.data, vec![1.0, 2.0, 1.0]),
1661 other => panic!("unexpected IC {other:?}"),
1662 }
1663 }
1664
1665 #[test]
1666 fn unique_string_rows() {
1667 let array = StringArray::new(
1668 vec![
1669 "alpha".into(),
1670 "alpha".into(),
1671 "gamma".into(),
1672 "beta".into(),
1673 "beta".into(),
1674 "beta".into(),
1675 ],
1676 vec![3, 2],
1677 )
1678 .unwrap();
1679 let eval = evaluate(
1680 Value::StringArray(array),
1681 &[Value::from("rows"), Value::from("stable")],
1682 )
1683 .expect("unique");
1684 let (values, ia, ic) = eval.into_triple();
1685 match values {
1686 Value::StringArray(sa) => {
1687 assert_eq!(sa.shape, vec![2, 2]);
1688 assert_eq!(sa.data, vec!["alpha", "gamma", "beta", "beta"]);
1689 }
1690 other => panic!("unexpected values {other:?}"),
1691 }
1692 match ia {
1693 Value::Tensor(t) => assert_eq!(t.data, vec![1.0, 3.0]),
1694 other => panic!("unexpected IA {other:?}"),
1695 }
1696 match ic {
1697 Value::Tensor(t) => assert_eq!(t.data, vec![1.0, 1.0, 2.0]),
1698 other => panic!("unexpected IC {other:?}"),
1699 }
1700 }
1701
1702 #[test]
1703 fn unique_complex_sorted() {
1704 let tensor = ComplexTensor::new(
1705 vec![(1.0, 1.0), (0.0, 2.0), (1.0, -1.0), (0.0, 2.0)],
1706 vec![4, 1],
1707 )
1708 .unwrap();
1709 let eval = evaluate(Value::ComplexTensor(tensor), &[]).expect("unique");
1710 let (values, ..) = eval.into_triple();
1711 match values {
1712 Value::ComplexTensor(t) => {
1713 assert_eq!(t.data.len(), 3);
1714 assert_eq!(t.data[0], (1.0, -1.0));
1715 assert_eq!(t.data[1], (1.0, 1.0));
1716 assert_eq!(t.data[2], (0.0, 2.0));
1717 }
1718 other => panic!("unexpected values {other:?}"),
1719 }
1720 }
1721
1722 #[test]
1723 fn unique_handles_logical_arrays() {
1724 let logical = LogicalArray::new(vec![1, 0, 1, 1], vec![4, 1]).unwrap();
1725 let eval = evaluate(Value::LogicalArray(logical), &[]).expect("unique");
1726 let values = eval.into_values_value();
1727 match values {
1728 Value::Tensor(t) => assert_eq!(t.data, vec![0.0, 1.0]),
1729 other => panic!("unexpected values {other:?}"),
1730 }
1731 }
1732
1733 #[test]
1734 fn unique_gpu_roundtrip() {
1735 test_support::with_test_provider(|provider| {
1736 let tensor = Tensor::new(vec![5.0, 3.0, 5.0, 1.0], vec![4, 1]).unwrap();
1737 let view = runmat_accelerate_api::HostTensorView {
1738 data: &tensor.data,
1739 shape: &tensor.shape,
1740 };
1741 let handle = provider.upload(&view).expect("upload");
1742 let eval =
1743 evaluate(Value::GpuTensor(handle), &[Value::from("stable")]).expect("unique");
1744 let values = eval.into_values_value();
1745 match values {
1746 Value::Tensor(t) => assert_eq!(t.data, vec![5.0, 3.0, 1.0]),
1747 other => panic!("unexpected values {other:?}"),
1748 }
1749 });
1750 }
1751
1752 #[test]
1753 #[cfg(feature = "wgpu")]
1754 fn unique_wgpu_matches_cpu() {
1755 let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
1756 runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
1757 );
1758 let tensor = Tensor::new(vec![5.0, 3.0, 5.0, 1.0, 2.0], vec![5, 1]).unwrap();
1759 let host_eval = evaluate(Value::Tensor(tensor.clone()), &[]).expect("host unique");
1760 let (host_values, host_ia, host_ic) = host_eval.into_triple();
1761
1762 let provider = runmat_accelerate_api::provider().expect("provider registered");
1763 let view = runmat_accelerate_api::HostTensorView {
1764 data: &tensor.data,
1765 shape: &tensor.shape,
1766 };
1767 let handle = provider.upload(&view).expect("upload");
1768 let gpu_eval = evaluate(Value::GpuTensor(handle.clone()), &[]).expect("gpu unique");
1769 let (gpu_values, gpu_ia, gpu_ic) = gpu_eval.into_triple();
1770 let _ = provider.free(&handle);
1771
1772 let host_values = test_support::gather(host_values).expect("gather host values");
1773 let host_ia = test_support::gather(host_ia).expect("gather host ia");
1774 let host_ic = test_support::gather(host_ic).expect("gather host ic");
1775 let gpu_values = test_support::gather(gpu_values).expect("gather gpu values");
1776 let gpu_ia = test_support::gather(gpu_ia).expect("gather gpu ia");
1777 let gpu_ic = test_support::gather(gpu_ic).expect("gather gpu ic");
1778
1779 assert_eq!(gpu_values.shape, host_values.shape);
1780 assert_eq!(gpu_values.data, host_values.data);
1781 assert_eq!(gpu_ia.data, host_ia.data);
1782 assert_eq!(gpu_ic.data, host_ic.data);
1783 }
1784
1785 #[test]
1786 fn unique_rejects_legacy_option() {
1787 let tensor = Tensor::new(vec![1.0, 1.0], vec![2, 1]).unwrap();
1788 let err = evaluate(Value::Tensor(tensor), &[Value::from("legacy")]).unwrap_err();
1789 assert!(err.contains("legacy"));
1790 }
1791
1792 #[test]
1793 fn unique_conflicting_order_flags() {
1794 let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
1795 let err = evaluate(
1796 Value::Tensor(tensor),
1797 &[Value::from("stable"), Value::from("sorted")],
1798 )
1799 .unwrap_err();
1800 assert!(err.contains("stable"));
1801 }
1802
1803 #[test]
1804 fn unique_conflicting_occurrence_flags() {
1805 let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
1806 let err = evaluate(
1807 Value::Tensor(tensor),
1808 &[Value::from("first"), Value::from("last")],
1809 )
1810 .unwrap_err();
1811 assert!(err.contains("first"));
1812 }
1813
1814 #[test]
1815 fn unique_rows_requires_two_dimensional_input() {
1816 let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1, 1]).unwrap();
1817 let err = evaluate(Value::Tensor(tensor), &[Value::from("rows")]).unwrap_err();
1818 assert!(err.contains("2-D matrix"));
1819 }
1820
1821 #[test]
1822 #[cfg(feature = "doc_export")]
1823 fn doc_examples_present() {
1824 let blocks = test_support::doc_examples(DOC_MD);
1825 assert!(!blocks.is_empty());
1826 }
1827
1828 #[test]
1829 fn unique_handles_empty_rows() {
1830 let tensor = Tensor::new(Vec::new(), vec![0, 3]).unwrap();
1831 let eval = evaluate(Value::Tensor(tensor), &[Value::from("rows")]).expect("unique");
1832 let (values, ia, ic) = eval.into_triple();
1833 match values {
1834 Value::Tensor(t) => {
1835 assert_eq!(t.shape, vec![0, 3]);
1836 assert!(t.data.is_empty());
1837 }
1838 other => panic!("unexpected values {other:?}"),
1839 }
1840 match ia {
1841 Value::Tensor(t) => assert!(t.data.is_empty()),
1842 other => panic!("unexpected IA {other:?}"),
1843 }
1844 match ic {
1845 Value::Tensor(t) => assert!(t.data.is_empty()),
1846 other => panic!("unexpected IC {other:?}"),
1847 }
1848 }
1849
1850 #[test]
1851 fn unique_accepts_integer_scalars() {
1852 let eval = evaluate(Value::Int(IntValue::I32(42)), &[]).expect("unique");
1853 let values = eval.into_values_value();
1854 match values {
1855 Value::Num(n) => assert_eq!(n, 42.0),
1856 other => panic!("unexpected values {other:?}"),
1857 }
1858 }
1859}