vortex_array/compute/conformance/
consistency.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! # Array Consistency Tests
5//!
6//! This module contains tests that verify consistency between related compute operations
7//! on Vortex arrays. These tests ensure that different ways of achieving the same result
8//! produce identical outputs.
9//!
10//! ## Test Categories
11//!
12//! - **Filter/Take Consistency**: Verifies that filtering with a mask produces the same
13//!   result as taking with the indices where the mask is true.
14//! - **Mask Composition**: Ensures that applying multiple masks sequentially produces
15//!   the same result as applying a combined mask.
16//! - **Identity Operations**: Tests that operations with identity inputs (all-true masks,
17//!   sequential indices) preserve the original array.
18//! - **Null Handling**: Verifies consistent behavior when operations introduce or
19//!   interact with null values.
20//! - **Edge Cases**: Tests empty arrays, single elements, and boundary conditions.
21
22use vortex_buffer::BitBuffer;
23use vortex_dtype::{DType, Nullability, PType};
24use vortex_error::{VortexUnwrap, vortex_panic};
25use vortex_mask::Mask;
26
27use crate::arrays::{BoolArray, PrimitiveArray};
28use crate::compute::{Operator, and, cast, compare, filter, invert, mask, or, take};
29use crate::{Array, IntoArray};
30
31/// Tests that filter and take operations produce consistent results.
32///
33/// # Invariant
34/// `filter(array, mask)` should equal `take(array, indices_where_mask_is_true)`
35///
36/// # Test Details
37/// - Creates a mask that keeps elements where index % 3 != 1
38/// - Applies filter with this mask
39/// - Creates indices array containing positions where mask is true
40/// - Applies take with these indices
41/// - Verifies both results are identical
42fn test_filter_take_consistency(array: &dyn Array) {
43    let len = array.len();
44    if len == 0 {
45        return;
46    }
47
48    // Create a test mask (keep elements where index % 3 != 1)
49    let mask_pattern: BitBuffer = (0..len).map(|i| i % 3 != 1).collect();
50    let mask = Mask::from_buffer(mask_pattern.clone());
51
52    // Filter the array
53    let filtered = filter(array, &mask).vortex_unwrap();
54
55    // Create indices where mask is true
56    let indices: Vec<u64> = mask_pattern
57        .iter()
58        .enumerate()
59        .filter_map(|(i, v)| v.then_some(i as u64))
60        .collect();
61    let indices_array = PrimitiveArray::from_iter(indices).into_array();
62
63    // Take using those indices
64    let taken = take(array, &indices_array).vortex_unwrap();
65
66    // Results should be identical
67    assert_eq!(
68        filtered.len(),
69        taken.len(),
70        "Filter and take should produce arrays of the same length. \
71         Filtered length: {}, Taken length: {}",
72        filtered.len(),
73        taken.len()
74    );
75
76    for i in 0..filtered.len() {
77        let filtered_val = filtered.scalar_at(i);
78        let taken_val = taken.scalar_at(i);
79        assert_eq!(
80            filtered_val, taken_val,
81            "Filter and take produced different values at index {i}. \
82             Filtered value: {filtered_val:?}, Taken value: {taken_val:?}"
83        );
84    }
85}
86
87/// Tests that double masking is consistent with combined mask.
88///
89/// # Invariant
90/// `mask(mask(array, mask1), mask2)` should equal `mask(array, mask1 | mask2)`
91///
92/// # Test Details
93/// - Creates two masks: mask1 (every 3rd element) and mask2 (every 2nd element)
94/// - Applies masks sequentially: first mask1, then mask2 on the result
95/// - Creates a combined mask using OR operation (element is masked if either mask is true)
96/// - Applies the combined mask directly to the original array
97/// - Verifies both approaches produce identical results
98///
99/// # Why This Matters
100/// This test ensures that mask operations compose correctly, which is critical for
101/// complex query operations that may apply multiple filters.
102fn test_double_mask_consistency(array: &dyn Array) {
103    let len = array.len();
104    if len == 0 {
105        return;
106    }
107
108    // Create two different mask patterns
109    let mask1: Mask = (0..len).map(|i| i % 3 == 0).collect();
110    let mask2: Mask = (0..len).map(|i| i % 2 == 0).collect();
111
112    // Apply masks sequentially
113    let first_masked = mask(array, &mask1).vortex_unwrap();
114    let double_masked = mask(&first_masked, &mask2).vortex_unwrap();
115
116    // Create combined mask (OR operation - element is masked if EITHER mask is true)
117    let combined_pattern: BitBuffer = mask1
118        .to_bit_buffer()
119        .iter()
120        .zip(mask2.to_bit_buffer().iter())
121        .map(|(a, b)| a || b)
122        .collect();
123    let combined_mask = Mask::from_buffer(combined_pattern);
124
125    // Apply combined mask directly
126    let directly_masked = mask(array, &combined_mask).vortex_unwrap();
127
128    // Results should be identical
129    assert_eq!(
130        double_masked.len(),
131        directly_masked.len(),
132        "Sequential masking and combined masking should produce arrays of the same length. \
133         Sequential length: {}, Combined length: {}",
134        double_masked.len(),
135        directly_masked.len()
136    );
137
138    for i in 0..double_masked.len() {
139        let double_val = double_masked.scalar_at(i);
140        let direct_val = directly_masked.scalar_at(i);
141        assert_eq!(
142            double_val, direct_val,
143            "Sequential masking and combined masking produced different values at index {i}. \
144             Sequential masking value: {double_val:?}, Combined masking value: {direct_val:?}\n\
145             This likely indicates an issue with how masks are composed in the array implementation."
146        );
147    }
148}
149
150/// Tests that filtering with an all-true mask preserves the array.
151///
152/// # Invariant
153/// `filter(array, all_true_mask)` should equal `array`
154///
155/// # Test Details
156/// - Creates a mask with all elements set to true
157/// - Applies filter with this mask
158/// - Verifies the result is identical to the original array
159///
160/// # Why This Matters
161/// This is an identity operation that should be optimized in implementations
162/// to avoid unnecessary copying.
163fn test_filter_identity(array: &dyn Array) {
164    let len = array.len();
165    if len == 0 {
166        return;
167    }
168
169    let all_true_mask = Mask::new_true(len);
170    let filtered = filter(array, &all_true_mask).vortex_unwrap();
171
172    // Filtered array should be identical to original
173    assert_eq!(
174        filtered.len(),
175        array.len(),
176        "Filtering with all-true mask should preserve array length. \
177         Original length: {}, Filtered length: {}",
178        array.len(),
179        filtered.len()
180    );
181
182    for i in 0..len {
183        let original_val = array.scalar_at(i);
184        let filtered_val = filtered.scalar_at(i);
185        assert_eq!(
186            filtered_val, original_val,
187            "Filtering with all-true mask should preserve all values. \
188             Value at index {i} changed from {original_val:?} to {filtered_val:?}"
189        );
190    }
191}
192
193/// Tests that masking with an all-false mask preserves values while making them nullable.
194///
195/// # Invariant
196/// `mask(array, all_false_mask)` should have same values as `array` but with nullable type
197///
198/// # Test Details
199/// - Creates a mask with all elements set to false (no elements are nullified)
200/// - Applies mask operation
201/// - Verifies all values are preserved but the array type becomes nullable
202///
203/// # Why This Matters
204/// Masking always produces a nullable array, even when no values are actually masked.
205/// This test ensures the type system handles this correctly.
206fn test_mask_identity(array: &dyn Array) {
207    let len = array.len();
208    if len == 0 {
209        return;
210    }
211
212    let all_false_mask = Mask::new_false(len);
213    let masked = mask(array, &all_false_mask).vortex_unwrap();
214
215    // Masked array should have same values (just nullable)
216    assert_eq!(
217        masked.len(),
218        array.len(),
219        "Masking with all-false mask should preserve array length. \
220         Original length: {}, Masked length: {}",
221        array.len(),
222        masked.len()
223    );
224
225    assert!(
226        masked.dtype().is_nullable(),
227        "Mask operation should always produce a nullable array, but dtype is {:?}",
228        masked.dtype()
229    );
230
231    for i in 0..len {
232        let original_val = array.scalar_at(i);
233        let masked_val = masked.scalar_at(i);
234        let expected_val = original_val.clone().into_nullable();
235        assert_eq!(
236            masked_val, expected_val,
237            "Masking with all-false mask should preserve values (as nullable). \
238             Value at index {i}: original = {original_val:?}, masked = {masked_val:?}, expected = {expected_val:?}"
239        );
240    }
241}
242
243/// Tests that slice and filter with contiguous mask produce same results.
244///
245/// # Invariant
246/// `filter(array, contiguous_true_mask)` should equal `slice(array, start, end)`
247///
248/// # Test Details
249/// - Creates a mask that is true only for indices 1, 2, and 3
250/// - Filters the array with this mask
251/// - Slices the array from index 1 to 4
252/// - Verifies both operations produce identical results
253///
254/// # Why This Matters
255/// When a filter mask represents a contiguous range, it should be equivalent to
256/// a slice operation. Some implementations may optimize this case.
257fn test_slice_filter_consistency(array: &dyn Array) {
258    let len = array.len();
259    if len < 4 {
260        return; // Need at least 4 elements for meaningful test
261    }
262
263    // Create a contiguous mask (true from index 1 to 3)
264    let mut mask_pattern = vec![false; len];
265    mask_pattern[1..4.min(len)].fill(true);
266
267    let mask = Mask::from_iter(mask_pattern);
268    let filtered = filter(array, &mask).vortex_unwrap();
269
270    // Slice should produce the same result
271    let sliced = array.slice(1..4.min(len));
272
273    assert_eq!(
274        filtered.len(),
275        sliced.len(),
276        "Filter with contiguous mask and slice should produce same length. \
277         Filtered length: {}, Sliced length: {}",
278        filtered.len(),
279        sliced.len()
280    );
281
282    for i in 0..filtered.len() {
283        let filtered_val = filtered.scalar_at(i);
284        let sliced_val = sliced.scalar_at(i);
285        assert_eq!(
286            filtered_val, sliced_val,
287            "Filter with contiguous mask and slice produced different values at index {i}. \
288             Filtered value: {filtered_val:?}, Sliced value: {sliced_val:?}"
289        );
290    }
291}
292
293/// Tests that take with sequential indices equals slice.
294///
295/// # Invariant
296/// `take(array, [1, 2, 3, ...])` should equal `slice(array, 1, n)`
297///
298/// # Test Details
299/// - Creates indices array with sequential values [1, 2, 3]
300/// - Takes elements at these indices
301/// - Slices array from index 1 to 4
302/// - Verifies both operations produce identical results
303///
304/// # Why This Matters
305/// Sequential takes are a common pattern that can be optimized to slice operations.
306fn test_take_slice_consistency(array: &dyn Array) {
307    let len = array.len();
308    if len < 3 {
309        return; // Need at least 3 elements
310    }
311
312    // Take indices [1, 2, 3]
313    let end = 4.min(len);
314    let indices = PrimitiveArray::from_iter((1..end).map(|i| i as u64)).into_array();
315    let taken = take(array, &indices).vortex_unwrap();
316
317    // Slice from 1 to end
318    let sliced = array.slice(1..end);
319
320    assert_eq!(
321        taken.len(),
322        sliced.len(),
323        "Take with sequential indices and slice should produce same length. \
324         Taken length: {}, Sliced length: {}",
325        taken.len(),
326        sliced.len()
327    );
328
329    for i in 0..taken.len() {
330        let taken_val = taken.scalar_at(i);
331        let sliced_val = sliced.scalar_at(i);
332        assert_eq!(
333            taken_val, sliced_val,
334            "Take with sequential indices and slice produced different values at index {i}. \
335             Taken value: {taken_val:?}, Sliced value: {sliced_val:?}"
336        );
337    }
338}
339
340/// Tests that filter preserves relative ordering
341fn test_filter_preserves_order(array: &dyn Array) {
342    let len = array.len();
343    if len < 4 {
344        return;
345    }
346
347    // Create a mask that selects elements at indices 0, 2, 3
348    let mask_pattern: Vec<bool> = (0..len).map(|i| i == 0 || i == 2 || i == 3).collect();
349    let mask = Mask::from_iter(mask_pattern);
350
351    let filtered = filter(array, &mask).vortex_unwrap();
352
353    // Verify the filtered array contains the right elements in order
354    assert_eq!(filtered.len(), 3.min(len));
355    if len >= 4 {
356        assert_eq!(filtered.scalar_at(0), array.scalar_at(0));
357        assert_eq!(filtered.scalar_at(1), array.scalar_at(2),);
358        assert_eq!(filtered.scalar_at(2), array.scalar_at(3));
359    }
360}
361
362/// Tests that take with repeated indices works correctly
363fn test_take_repeated_indices(array: &dyn Array) {
364    let len = array.len();
365    if len == 0 {
366        return;
367    }
368
369    // Take the first element three times
370    let indices = PrimitiveArray::from_iter([0u64, 0, 0]).into_array();
371    let taken = take(array, &indices).vortex_unwrap();
372
373    assert_eq!(taken.len(), 3);
374    for i in 0..3 {
375        assert_eq!(taken.scalar_at(i), array.scalar_at(0),);
376    }
377}
378
379/// Tests mask and filter interaction with nulls
380fn test_mask_filter_null_consistency(array: &dyn Array) {
381    let len = array.len();
382    if len < 3 {
383        return;
384    }
385
386    // First mask some elements
387    let mask_pattern: Vec<bool> = (0..len).map(|i| i % 2 == 0).collect();
388    let mask_array = Mask::from_iter(mask_pattern);
389    let masked = mask(array, &mask_array).vortex_unwrap();
390
391    // Then filter to remove the nulls
392    let filter_pattern: Vec<bool> = (0..len).map(|i| i % 2 != 0).collect();
393    let filter_mask = Mask::from_iter(filter_pattern);
394    let filtered = filter(&masked, &filter_mask).vortex_unwrap();
395
396    // This should be equivalent to directly filtering the original array
397    let direct_filtered = filter(array, &filter_mask).vortex_unwrap();
398
399    assert_eq!(filtered.len(), direct_filtered.len());
400    for i in 0..filtered.len() {
401        assert_eq!(filtered.scalar_at(i), direct_filtered.scalar_at(i));
402    }
403}
404
405/// Tests that empty operations are consistent
406fn test_empty_operations_consistency(array: &dyn Array) {
407    let len = array.len();
408
409    // Empty filter
410    let empty_filter = filter(array, &Mask::new_false(len)).vortex_unwrap();
411    assert_eq!(empty_filter.len(), 0);
412    assert_eq!(empty_filter.dtype(), array.dtype());
413
414    // Empty take
415    let empty_indices = PrimitiveArray::empty::<u64>(Nullability::NonNullable).into_array();
416    let empty_take = take(array, &empty_indices).vortex_unwrap();
417    assert_eq!(empty_take.len(), 0);
418    assert_eq!(empty_take.dtype(), array.dtype());
419
420    // Empty slice (if array is non-empty)
421    if len > 0 {
422        let empty_slice = array.slice(0..0);
423        assert_eq!(empty_slice.len(), 0);
424        assert_eq!(empty_slice.dtype(), array.dtype());
425    }
426}
427
428/// Tests that take preserves array properties
429fn test_take_preserves_properties(array: &dyn Array) {
430    let len = array.len();
431    if len == 0 {
432        return;
433    }
434
435    // Take all elements in original order
436    let indices = PrimitiveArray::from_iter((0..len).map(|i| i as u64)).into_array();
437    let taken = take(array, &indices).vortex_unwrap();
438
439    // Should be identical to original
440    assert_eq!(taken.len(), array.len());
441    assert_eq!(taken.dtype(), array.dtype());
442    for i in 0..len {
443        assert_eq!(taken.scalar_at(i), array.scalar_at(i),);
444    }
445}
446
447/// Tests consistency with nullable indices.
448///
449/// # Invariant
450/// `take(array, [Some(0), None, Some(2)])` should produce `[array[0], null, array[2]]`
451///
452/// # Test Details
453/// - Creates an indices array with null at position 1: `[Some(0), None, Some(2)]`
454/// - Takes elements using these indices
455/// - Verifies that:
456///   - Position 0 contains the value from array index 0
457///   - Position 1 contains null
458///   - Position 2 contains the value from array index 2
459///   - The result array has nullable type
460///
461/// # Why This Matters
462/// Nullable indices are a powerful feature that allows introducing nulls during
463/// a take operation, which is useful for outer joins and similar operations.
464fn test_nullable_indices_consistency(array: &dyn Array) {
465    let len = array.len();
466    if len < 3 {
467        return; // Need at least 3 elements to test indices 0 and 2
468    }
469
470    // Create nullable indices where some indices are null
471    let indices = PrimitiveArray::from_option_iter([Some(0u64), None, Some(2u64)]).into_array();
472
473    let taken = take(array, &indices).vortex_unwrap();
474
475    // Result should have nulls where indices were null
476    assert_eq!(
477        taken.len(),
478        3,
479        "Take with nullable indices should produce array of length 3, got {}",
480        taken.len()
481    );
482
483    assert!(
484        taken.dtype().is_nullable(),
485        "Take with nullable indices should produce nullable array, but dtype is {:?}",
486        taken.dtype()
487    );
488
489    // Check first element (from index 0)
490    let expected_0 = array.scalar_at(0).into_nullable();
491    let actual_0 = taken.scalar_at(0);
492    assert_eq!(
493        actual_0, expected_0,
494        "Take with nullable indices: element at position 0 should be from array index 0. \
495         Expected: {expected_0:?}, Actual: {actual_0:?}"
496    );
497
498    // Check second element (should be null)
499    let actual_1 = taken.scalar_at(1);
500    assert!(
501        actual_1.is_null(),
502        "Take with nullable indices: element at position 1 should be null, but got {actual_1:?}"
503    );
504
505    // Check third element (from index 2)
506    let expected_2 = array.scalar_at(2).into_nullable();
507    let actual_2 = taken.scalar_at(2);
508    assert_eq!(
509        actual_2, expected_2,
510        "Take with nullable indices: element at position 2 should be from array index 2. \
511         Expected: {expected_2:?}, Actual: {actual_2:?}"
512    );
513}
514
515/// Tests large array consistency
516fn test_large_array_consistency(array: &dyn Array) {
517    let len = array.len();
518    if len < 1000 {
519        return;
520    }
521
522    // Test with every 10th element
523    let indices: Vec<u64> = (0..len).step_by(10).map(|i| i as u64).collect();
524    let indices_array = PrimitiveArray::from_iter(indices).into_array();
525    let taken = take(array, &indices_array).vortex_unwrap();
526
527    // Create equivalent filter mask
528    let mask_pattern: Vec<bool> = (0..len).map(|i| i % 10 == 0).collect();
529    let mask = Mask::from_iter(mask_pattern);
530    let filtered = filter(array, &mask).vortex_unwrap();
531
532    // Results should match
533    assert_eq!(taken.len(), filtered.len());
534    for i in 0..taken.len() {
535        assert_eq!(taken.scalar_at(i), filtered.scalar_at(i),);
536    }
537}
538
539/// Tests that comparison operations follow inverse relationships.
540///
541/// # Invariants
542/// - `compare(array, value, Eq)` is the inverse of `compare(array, value, NotEq)`
543/// - `compare(array, value, Gt)` is the inverse of `compare(array, value, Lte)`
544/// - `compare(array, value, Lt)` is the inverse of `compare(array, value, Gte)`
545///
546/// # Test Details
547/// - Creates comparison results for each operator
548/// - Verifies that inverse operations produce opposite boolean values
549/// - Tests with multiple scalar values to ensure consistency
550///
551/// # Why This Matters
552/// Comparison operations must maintain logical consistency across encodings.
553/// This test catches bugs where an encoding might implement one comparison
554/// correctly but fail on its logical inverse.
555fn test_comparison_inverse_consistency(array: &dyn Array) {
556    let len = array.len();
557    if len == 0 {
558        return;
559    }
560
561    // Skip non-comparable types.
562    match array.dtype() {
563        DType::Null | DType::Extension(_) | DType::Struct(..) | DType::List(..) => return,
564        _ => {}
565    }
566
567    // Get a test value from the middle of the array
568    let test_scalar = if len == 0 {
569        return;
570    } else {
571        array.scalar_at(len / 2)
572    };
573
574    // Test Eq vs NotEq
575    let const_array = crate::arrays::ConstantArray::new(test_scalar, len);
576    if let (Ok(eq_result), Ok(neq_result)) = (
577        compare(array, const_array.as_ref(), Operator::Eq),
578        compare(array, const_array.as_ref(), Operator::NotEq),
579    ) {
580        let inverted_eq = invert(&eq_result).vortex_unwrap();
581
582        assert_eq!(
583            inverted_eq.len(),
584            neq_result.len(),
585            "Inverted Eq should have same length as NotEq"
586        );
587
588        for i in 0..inverted_eq.len() {
589            let inv_val = inverted_eq.scalar_at(i);
590            let neq_val = neq_result.scalar_at(i);
591            assert_eq!(
592                inv_val, neq_val,
593                "At index {i}: NOT(Eq) should equal NotEq. \
594                 NOT(Eq) = {inv_val:?}, NotEq = {neq_val:?}"
595            );
596        }
597    }
598
599    // Test Gt vs Lte
600    if let (Ok(gt_result), Ok(lte_result)) = (
601        compare(array, const_array.as_ref(), Operator::Gt),
602        compare(array, const_array.as_ref(), Operator::Lte),
603    ) {
604        let inverted_gt = invert(&gt_result).vortex_unwrap();
605
606        for i in 0..inverted_gt.len() {
607            let inv_val = inverted_gt.scalar_at(i);
608            let lte_val = lte_result.scalar_at(i);
609            assert_eq!(
610                inv_val, lte_val,
611                "At index {i}: NOT(Gt) should equal Lte. \
612                 NOT(Gt) = {inv_val:?}, Lte = {lte_val:?}"
613            );
614        }
615    }
616
617    // Test Lt vs Gte
618    if let (Ok(lt_result), Ok(gte_result)) = (
619        compare(array, const_array.as_ref(), Operator::Lt),
620        compare(array, const_array.as_ref(), Operator::Gte),
621    ) {
622        let inverted_lt = invert(&lt_result).vortex_unwrap();
623
624        for i in 0..inverted_lt.len() {
625            let inv_val = inverted_lt.scalar_at(i);
626            let gte_val = gte_result.scalar_at(i);
627            assert_eq!(
628                inv_val, gte_val,
629                "At index {i}: NOT(Lt) should equal Gte. \
630                 NOT(Lt) = {inv_val:?}, Gte = {gte_val:?}"
631            );
632        }
633    }
634}
635
636/// Tests that comparison operations maintain proper symmetry relationships.
637///
638/// # Invariants
639/// - `compare(array, value, Gt)` should equal `compare_scalar_array(value, array, Lt)`
640/// - `compare(array, value, Lt)` should equal `compare_scalar_array(value, array, Gt)`
641/// - `compare(array, value, Eq)` should equal `compare_scalar_array(value, array, Eq)`
642///
643/// # Test Details
644/// - Compares array-scalar operations with their symmetric scalar-array versions
645/// - Verifies that ordering relationships are properly reversed
646/// - Tests equality which should be symmetric
647///
648/// # Why This Matters
649/// Ensures that comparison operations maintain mathematical ordering properties
650/// regardless of operand order.
651fn test_comparison_symmetry_consistency(array: &dyn Array) {
652    let len = array.len();
653    if len == 0 {
654        return;
655    }
656
657    // Skip non-comparable types.
658    match array.dtype() {
659        DType::Null | DType::Extension(_) | DType::Struct(..) | DType::List(..) => return,
660        _ => {}
661    }
662
663    // Get test values
664    let test_scalar = if len == 2 {
665        return;
666    } else {
667        array.scalar_at(len / 2)
668    };
669
670    // Create a constant array with the test scalar for reverse comparison
671    let const_array = crate::arrays::ConstantArray::new(test_scalar, len);
672
673    // Test Gt vs Lt symmetry
674    if let (Ok(arr_gt_scalar), Ok(scalar_lt_arr)) = (
675        compare(array, const_array.as_ref(), Operator::Gt),
676        compare(const_array.as_ref(), array, Operator::Lt),
677    ) {
678        assert_eq!(
679            arr_gt_scalar.len(),
680            scalar_lt_arr.len(),
681            "Symmetric comparisons should have same length"
682        );
683
684        for i in 0..arr_gt_scalar.len() {
685            let arr_gt = arr_gt_scalar.scalar_at(i);
686            let scalar_lt = scalar_lt_arr.scalar_at(i);
687            assert_eq!(
688                arr_gt, scalar_lt,
689                "At index {i}: (array > scalar) should equal (scalar < array). \
690                 array > scalar = {arr_gt:?}, scalar < array = {scalar_lt:?}"
691            );
692        }
693    }
694
695    // Test Eq symmetry
696    if let (Ok(arr_eq_scalar), Ok(scalar_eq_arr)) = (
697        compare(array, const_array.as_ref(), Operator::Eq),
698        compare(const_array.as_ref(), array, Operator::Eq),
699    ) {
700        for i in 0..arr_eq_scalar.len() {
701            let arr_eq = arr_eq_scalar.scalar_at(i);
702            let scalar_eq = scalar_eq_arr.scalar_at(i);
703            assert_eq!(
704                arr_eq, scalar_eq,
705                "At index {i}: (array == scalar) should equal (scalar == array). \
706                 array == scalar = {arr_eq:?}, scalar == array = {scalar_eq:?}"
707            );
708        }
709    }
710}
711
712/// Tests that boolean operations follow De Morgan's laws.
713///
714/// # Invariants
715/// - `NOT(A AND B)` equals `(NOT A) OR (NOT B)`
716/// - `NOT(A OR B)` equals `(NOT A) AND (NOT B)`
717///
718/// # Test Details
719/// - If the array is boolean, uses it directly for testing boolean operations
720/// - Creates two boolean masks from patterns based on the array
721/// - Computes AND/OR operations and their inversions
722/// - Verifies De Morgan's laws hold for all elements
723///
724/// # Why This Matters
725/// Boolean operations must maintain logical consistency across encodings.
726/// This test catches bugs where encodings might optimize boolean operations
727/// incorrectly, breaking fundamental logical properties.
728fn test_boolean_demorgan_consistency(array: &dyn Array) {
729    if !matches!(array.dtype(), DType::Bool(_)) {
730        return;
731    }
732
733    let mask = {
734        let mask_pattern: Vec<bool> = (0..array.len()).map(|i| i % 3 == 0).collect();
735        BoolArray::from_iter(mask_pattern)
736    };
737    let mask = mask.as_ref();
738
739    // Test first De Morgan's law: NOT(A AND B) = (NOT A) OR (NOT B)
740    if let (Ok(a_and_b), Ok(not_a), Ok(not_b)) = (and(array, mask), invert(array), invert(mask)) {
741        let not_a_and_b = invert(&a_and_b).vortex_unwrap();
742        let not_a_or_not_b = or(&not_a, &not_b).vortex_unwrap();
743
744        assert_eq!(
745            not_a_and_b.len(),
746            not_a_or_not_b.len(),
747            "De Morgan's law results should have same length"
748        );
749
750        for i in 0..not_a_and_b.len() {
751            let left = not_a_and_b.scalar_at(i);
752            let right = not_a_or_not_b.scalar_at(i);
753            assert_eq!(
754                left, right,
755                "De Morgan's first law failed at index {i}: \
756                 NOT(A AND B) = {left:?}, (NOT A) OR (NOT B) = {right:?}"
757            );
758        }
759    }
760
761    // Test second De Morgan's law: NOT(A OR B) = (NOT A) AND (NOT B)
762    if let (Ok(a_or_b), Ok(not_a), Ok(not_b)) = (or(array, mask), invert(array), invert(mask)) {
763        let not_a_or_b = invert(&a_or_b).vortex_unwrap();
764        let not_a_and_not_b = and(&not_a, &not_b).vortex_unwrap();
765
766        for i in 0..not_a_or_b.len() {
767            let left = not_a_or_b.scalar_at(i);
768            let right = not_a_and_not_b.scalar_at(i);
769            assert_eq!(
770                left, right,
771                "De Morgan's second law failed at index {i}: \
772                 NOT(A OR B) = {left:?}, (NOT A) AND (NOT B) = {right:?}"
773            );
774        }
775    }
776}
777
778/// Tests that slice and aggregate operations produce consistent results.
779///
780/// # Invariants
781/// - Aggregating a sliced array should equal aggregating the corresponding
782///   elements from the canonical form
783/// - This applies to sum, count, min/max, and other aggregate functions
784///
785/// # Test Details
786/// - Slices the array and computes aggregates
787/// - Compares against aggregating the canonical form's slice
788/// - Tests multiple aggregate functions where applicable
789///
790/// # Why This Matters
791/// Aggregate operations on sliced arrays must produce correct results
792/// regardless of the underlying encoding's offset handling.
793fn test_slice_aggregate_consistency(array: &dyn Array) {
794    use vortex_dtype::DType;
795
796    use crate::compute::{min_max, nan_count, sum};
797
798    let len = array.len();
799    if len < 5 {
800        return; // Need enough elements for meaningful slice
801    }
802
803    // Define slice bounds
804    let start = 1;
805    let end = (len - 1).min(start + 10); // Take up to 10 elements
806
807    // Get sliced array and canonical slice
808    let sliced = array.slice(start..end);
809    let canonical = array.to_canonical();
810    let canonical_sliced = canonical.as_ref().slice(start..end);
811
812    // Test null count through invalid_count
813    assert_eq!(
814        sliced.invalid_count(),
815        canonical_sliced.invalid_count(),
816        "null_count on sliced array should match canonical. \
817             Sliced: {}, Canonical: {}",
818        sliced.invalid_count(),
819        canonical_sliced.invalid_count()
820    );
821
822    // Test sum for numeric types
823    if !matches!(array.dtype(), DType::Primitive(..)) {
824        return;
825    }
826
827    if let (Ok(slice_sum), Ok(canonical_sum)) = (sum(&sliced), sum(&canonical_sliced)) {
828        // Compare sum scalars
829        assert_eq!(
830            slice_sum, canonical_sum,
831            "sum on sliced array should match canonical. \
832                 Sliced: {slice_sum:?}, Canonical: {canonical_sum:?}"
833        );
834    }
835
836    // Test min_max
837    if let (Ok(slice_minmax), Ok(canonical_minmax)) = (min_max(&sliced), min_max(&canonical_sliced))
838    {
839        match (slice_minmax, canonical_minmax) {
840            (Some(s_result), Some(c_result)) => {
841                assert_eq!(
842                    s_result.min, c_result.min,
843                    "min on sliced array should match canonical. \
844                         Sliced: {:?}, Canonical: {:?}",
845                    s_result.min, c_result.min
846                );
847                assert_eq!(
848                    s_result.max, c_result.max,
849                    "max on sliced array should match canonical. \
850                         Sliced: {:?}, Canonical: {:?}",
851                    s_result.max, c_result.max
852                );
853            }
854            (None, None) => {} // Both empty, OK
855            _ => vortex_panic!("min_max results don't match"),
856        }
857    }
858
859    // Test nan_count for floating point types
860    if array.dtype().is_float()
861        && let (Ok(slice_nan_count), Ok(canonical_nan_count)) =
862            (nan_count(&sliced), nan_count(&canonical_sliced))
863    {
864        assert_eq!(
865            slice_nan_count, canonical_nan_count,
866            "nan_count on sliced array should match canonical. \
867                 Sliced: {slice_nan_count}, Canonical: {canonical_nan_count}"
868        );
869    }
870}
871
872/// Tests that cast operations preserve array properties when sliced.
873///
874/// # Invariant
875/// `cast(slice(array, start, end), dtype)` should equal `slice(cast(array, dtype), start, end)`
876///
877/// # Test Details
878/// - Slices the array from index 2 to 7 (or len-2 if smaller)
879/// - Casts the sliced array to a different type
880/// - Compares against the canonical form of the array (without slicing or casting the canonical form)
881/// - Verifies both approaches produce identical results
882///
883/// # Why This Matters
884/// This test specifically catches bugs where encodings (like RunEndArray) fail to preserve
885/// offset information during cast operations. Such bugs can lead to incorrect data being
886/// returned after casting a sliced array.
887fn test_cast_slice_consistency(array: &dyn Array) {
888    let len = array.len();
889    if len < 5 {
890        return; // Need at least 5 elements for meaningful slice
891    }
892
893    // Define slice bounds
894    let start = 2;
895    let end = 7.min(len - 2).max(start + 1); // Ensure we have at least 1 element
896
897    // Get canonical form of the original array
898    let canonical = array.to_canonical();
899
900    // Choose appropriate target dtype based on the array's type
901    let target_dtypes = match array.dtype() {
902        DType::Null => vec![],
903        DType::Bool(nullability) => vec![
904            DType::Primitive(PType::U8, *nullability),
905            DType::Primitive(PType::I32, *nullability),
906        ],
907        DType::Primitive(ptype, nullability) => {
908            let mut targets = vec![];
909            // Test nullability changes
910            let opposite_nullability = match nullability {
911                Nullability::NonNullable => Nullability::Nullable,
912                Nullability::Nullable => Nullability::NonNullable,
913            };
914            targets.push(DType::Primitive(*ptype, opposite_nullability));
915
916            // Test widening casts
917            match ptype {
918                PType::U8 => {
919                    targets.push(DType::Primitive(PType::U16, *nullability));
920                    targets.push(DType::Primitive(PType::I16, *nullability));
921                }
922                PType::U16 => {
923                    targets.push(DType::Primitive(PType::U32, *nullability));
924                    targets.push(DType::Primitive(PType::I32, *nullability));
925                }
926                PType::U32 => {
927                    targets.push(DType::Primitive(PType::U64, *nullability));
928                    targets.push(DType::Primitive(PType::I64, *nullability));
929                }
930                PType::U64 => {
931                    targets.push(DType::Primitive(PType::F64, *nullability));
932                }
933                PType::I8 => {
934                    targets.push(DType::Primitive(PType::I16, *nullability));
935                    targets.push(DType::Primitive(PType::F32, *nullability));
936                }
937                PType::I16 => {
938                    targets.push(DType::Primitive(PType::I32, *nullability));
939                    targets.push(DType::Primitive(PType::F32, *nullability));
940                }
941                PType::I32 => {
942                    targets.push(DType::Primitive(PType::I64, *nullability));
943                    targets.push(DType::Primitive(PType::F64, *nullability));
944                }
945                PType::I64 => {
946                    targets.push(DType::Primitive(PType::F64, *nullability));
947                }
948                PType::F16 => {
949                    targets.push(DType::Primitive(PType::F32, *nullability));
950                }
951                PType::F32 => {
952                    targets.push(DType::Primitive(PType::F64, *nullability));
953                    targets.push(DType::Primitive(PType::I32, *nullability));
954                }
955                PType::F64 => {
956                    targets.push(DType::Primitive(PType::I64, *nullability));
957                }
958            }
959            targets
960        }
961        DType::Utf8(nullability) => {
962            let opposite = match nullability {
963                Nullability::NonNullable => Nullability::Nullable,
964                Nullability::Nullable => Nullability::NonNullable,
965            };
966            vec![DType::Utf8(opposite), DType::Binary(*nullability)]
967        }
968        DType::Binary(nullability) => {
969            let opposite = match nullability {
970                Nullability::NonNullable => Nullability::Nullable,
971                Nullability::Nullable => Nullability::NonNullable,
972            };
973            vec![
974                DType::Binary(opposite),
975                DType::Utf8(*nullability), // May fail if not valid UTF-8
976            ]
977        }
978        DType::Decimal(decimal_type, nullability) => {
979            let opposite = match nullability {
980                Nullability::NonNullable => Nullability::Nullable,
981                Nullability::Nullable => Nullability::NonNullable,
982            };
983            vec![DType::Decimal(*decimal_type, opposite)]
984        }
985        DType::Struct(fields, nullability) => {
986            let opposite = match nullability {
987                Nullability::NonNullable => Nullability::Nullable,
988                Nullability::Nullable => Nullability::NonNullable,
989            };
990            vec![DType::Struct(fields.clone(), opposite)]
991        }
992        DType::List(element_type, nullability) => {
993            let opposite = match nullability {
994                Nullability::NonNullable => Nullability::Nullable,
995                Nullability::Nullable => Nullability::NonNullable,
996            };
997            vec![DType::List(element_type.clone(), opposite)]
998        }
999        DType::FixedSizeList(element_type, list_size, nullability) => {
1000            let opposite = match nullability {
1001                Nullability::NonNullable => Nullability::Nullable,
1002                Nullability::Nullable => Nullability::NonNullable,
1003            };
1004            vec![DType::FixedSizeList(
1005                element_type.clone(),
1006                *list_size,
1007                opposite,
1008            )]
1009        }
1010        DType::Extension(_) => vec![], // Extension types typically only cast to themselves
1011    };
1012
1013    // Test each target dtype
1014    for target_dtype in target_dtypes {
1015        // Slice the array
1016        let sliced = array.slice(start..end);
1017
1018        // Try to cast the sliced array
1019        let slice_then_cast = match cast(&sliced, &target_dtype) {
1020            Ok(result) => result,
1021            Err(_) => continue, // Skip if cast fails
1022        };
1023
1024        // Verify against canonical form
1025        assert_eq!(
1026            slice_then_cast.len(),
1027            end - start,
1028            "Sliced and casted array should have length {}, but has {}",
1029            end - start,
1030            slice_then_cast.len()
1031        );
1032
1033        // Compare each value against the canonical form
1034        for i in 0..slice_then_cast.len() {
1035            let slice_cast_val = slice_then_cast.scalar_at(i);
1036
1037            // Get the corresponding value from the canonical array (adjusted for slice offset)
1038            let canonical_val = canonical.as_ref().scalar_at(start + i);
1039
1040            // Cast the canonical scalar to the target dtype
1041            let expected_val = match canonical_val.cast(&target_dtype) {
1042                Ok(val) => val,
1043                Err(_) => {
1044                    // If scalar cast fails, we can't compare - skip this target dtype
1045                    // This can happen for some type conversions that aren't supported at scalar level
1046                    break;
1047                }
1048            };
1049
1050            assert_eq!(
1051                slice_cast_val,
1052                expected_val,
1053                "Cast of sliced array produced incorrect value at index {i}. \
1054                 Got: {slice_cast_val:?}, Expected: {expected_val:?} \
1055                 (canonical value at index {}: {canonical_val:?})\n\
1056                 This likely indicates the array encoding doesn't preserve offset information during cast.",
1057                start + i
1058            );
1059        }
1060
1061        // Also test the other way: cast then slice
1062        let casted = match cast(array, &target_dtype) {
1063            Ok(result) => result,
1064            Err(_) => continue, // Skip if cast fails
1065        };
1066        let cast_then_slice = casted.slice(start..end);
1067
1068        // Verify the two approaches produce identical results
1069        assert_eq!(
1070            slice_then_cast.len(),
1071            cast_then_slice.len(),
1072            "Slice-then-cast and cast-then-slice should produce arrays of the same length"
1073        );
1074
1075        for i in 0..slice_then_cast.len() {
1076            let slice_cast_val = slice_then_cast.scalar_at(i);
1077            let cast_slice_val = cast_then_slice.scalar_at(i);
1078            assert_eq!(
1079                slice_cast_val, cast_slice_val,
1080                "Slice-then-cast and cast-then-slice produced different values at index {i}. \
1081                 Slice-then-cast: {slice_cast_val:?}, Cast-then-slice: {cast_slice_val:?}"
1082            );
1083        }
1084    }
1085}
1086
1087/// Run all consistency tests on an array.
1088///
1089/// This function executes a comprehensive suite of consistency tests that verify
1090/// the correctness of compute operations on Vortex arrays.
1091///
1092/// # Test Suite Overview
1093///
1094/// ## Core Operation Consistency
1095/// - **Filter/Take**: Verifies `filter(array, mask)` equals `take(array, true_indices)`
1096/// - **Mask Composition**: Ensures sequential masks equal combined masks
1097/// - **Slice/Filter**: Checks contiguous filters equal slice operations
1098/// - **Take/Slice**: Validates sequential takes equal slice operations
1099/// - **Cast/Slice**: Ensures cast operations preserve sliced array properties
1100///
1101/// ## Boolean Operations
1102/// - **De Morgan's Laws**: Verifies boolean operations follow logical laws
1103///
1104/// ## Comparison Operations
1105/// - **Inverse Relationships**: Verifies logical inverses (Eq/NotEq, Gt/Lte, Lt/Gte)
1106/// - **Symmetry**: Ensures proper ordering relationships when operands are swapped
1107///
1108/// ## Aggregate Operations
1109/// - **Slice/Aggregate**: Verifies aggregates on sliced arrays match canonical
1110///
1111/// ## Identity Operations
1112/// - **Filter Identity**: All-true mask preserves the array
1113/// - **Mask Identity**: All-false mask preserves values (as nullable)
1114/// - **Take Identity**: Taking all indices preserves the array
1115///
1116/// ## Edge Cases
1117/// - **Empty Operations**: Empty filters, takes, and slices behave correctly
1118/// - **Single Element**: Operations work with single-element arrays
1119/// - **Repeated Indices**: Take with duplicate indices works correctly
1120///
1121/// ## Null Handling
1122/// - **Nullable Indices**: Null indices produce null values
1123/// - **Mask/Filter Interaction**: Masking then filtering behaves predictably
1124///
1125/// ## Large Arrays
1126/// - **Performance**: Operations scale correctly to large arrays (1000+ elements)
1127/// ```text
1128pub fn test_array_consistency(array: &dyn Array) {
1129    // Core operation consistency
1130    test_filter_take_consistency(array);
1131    test_double_mask_consistency(array);
1132    test_slice_filter_consistency(array);
1133    test_take_slice_consistency(array);
1134    test_cast_slice_consistency(array);
1135
1136    // Boolean operations
1137    test_boolean_demorgan_consistency(array);
1138
1139    // Comparison operations
1140    test_comparison_inverse_consistency(array);
1141    test_comparison_symmetry_consistency(array);
1142
1143    // Aggregate operations
1144    test_slice_aggregate_consistency(array);
1145
1146    // Identity operations
1147    test_filter_identity(array);
1148    test_mask_identity(array);
1149    test_take_preserves_properties(array);
1150
1151    // Ordering and correctness
1152    test_filter_preserves_order(array);
1153    test_take_repeated_indices(array);
1154
1155    // Null handling
1156    test_mask_filter_null_consistency(array);
1157    test_nullable_indices_consistency(array);
1158
1159    // Edge cases
1160    test_empty_operations_consistency(array);
1161    test_large_array_consistency(array);
1162}