vortex_array/compute/conformance/
consistency.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! # Array Consistency Tests
5//!
6//! This module contains tests that verify consistency between related compute operations
7//! on Vortex arrays. These tests ensure that different ways of achieving the same result
8//! produce identical outputs.
9//!
10//! ## Test Categories
11//!
12//! - **Filter/Take Consistency**: Verifies that filtering with a mask produces the same
13//!   result as taking with the indices where the mask is true.
14//! - **Mask Composition**: Ensures that applying multiple masks sequentially produces
15//!   the same result as applying a combined mask.
16//! - **Identity Operations**: Tests that operations with identity inputs (all-true masks,
17//!   sequential indices) preserve the original array.
18//! - **Null Handling**: Verifies consistent behavior when operations introduce or
19//!   interact with null values.
20//! - **Edge Cases**: Tests empty arrays, single elements, and boundary conditions.
21
22use arrow_buffer::BooleanBuffer;
23use vortex_buffer::buffer;
24use vortex_dtype::{DType, Nullability, PType};
25use vortex_error::{VortexUnwrap, vortex_panic};
26use vortex_mask::Mask;
27
28use crate::arrays::{BoolArray, ConstantArray, PrimitiveArray};
29use crate::compute::{Operator, and, cast, compare, filter, invert, mask, or, take};
30use crate::{Array, IntoArray};
31
32/// Tests that filter and take operations produce consistent results.
33///
34/// # Invariant
35/// `filter(array, mask)` should equal `take(array, indices_where_mask_is_true)`
36///
37/// # Test Details
38/// - Creates a mask that keeps elements where index % 3 != 1
39/// - Applies filter with this mask
40/// - Creates indices array containing positions where mask is true
41/// - Applies take with these indices
42/// - Verifies both results are identical
43fn test_filter_take_consistency(array: &dyn Array) {
44    let len = array.len();
45    if len == 0 {
46        return;
47    }
48
49    // Create a test mask (keep elements where index % 3 != 1)
50    let mask_pattern: BooleanBuffer = (0..len).map(|i| i % 3 != 1).collect();
51    let mask = Mask::from_buffer(mask_pattern.clone());
52
53    // Filter the array
54    let filtered = filter(array, &mask).vortex_unwrap();
55
56    // Create indices where mask is true
57    let indices: Vec<u64> = mask_pattern
58        .iter()
59        .enumerate()
60        .filter_map(|(i, v)| v.then_some(i as u64))
61        .collect();
62    let indices_array = PrimitiveArray::from_iter(indices).into_array();
63
64    // Take using those indices
65    let taken = take(array, &indices_array).vortex_unwrap();
66
67    // Results should be identical
68    assert_eq!(
69        filtered.len(),
70        taken.len(),
71        "Filter and take should produce arrays of the same length. \
72         Filtered length: {}, Taken length: {}",
73        filtered.len(),
74        taken.len()
75    );
76
77    for i in 0..filtered.len() {
78        let filtered_val = filtered.scalar_at(i);
79        let taken_val = taken.scalar_at(i);
80        assert_eq!(
81            filtered_val, taken_val,
82            "Filter and take produced different values at index {i}. \
83             Filtered value: {filtered_val:?}, Taken value: {taken_val:?}"
84        );
85    }
86}
87
88/// Tests that double masking is consistent with combined mask.
89///
90/// # Invariant
91/// `mask(mask(array, mask1), mask2)` should equal `mask(array, mask1 | mask2)`
92///
93/// # Test Details
94/// - Creates two masks: mask1 (every 3rd element) and mask2 (every 2nd element)
95/// - Applies masks sequentially: first mask1, then mask2 on the result
96/// - Creates a combined mask using OR operation (element is masked if either mask is true)
97/// - Applies the combined mask directly to the original array
98/// - Verifies both approaches produce identical results
99///
100/// # Why This Matters
101/// This test ensures that mask operations compose correctly, which is critical for
102/// complex query operations that may apply multiple filters.
103fn test_double_mask_consistency(array: &dyn Array) {
104    let len = array.len();
105    if len == 0 {
106        return;
107    }
108
109    // Create two different mask patterns
110    let mask1: Mask = (0..len).map(|i| i % 3 == 0).collect();
111    let mask2: Mask = (0..len).map(|i| i % 2 == 0).collect();
112
113    // Apply masks sequentially
114    let first_masked = mask(array, &mask1).vortex_unwrap();
115    let double_masked = mask(&first_masked, &mask2).vortex_unwrap();
116
117    // Create combined mask (OR operation - element is masked if EITHER mask is true)
118    let combined_pattern: BooleanBuffer = mask1
119        .to_boolean_buffer()
120        .iter()
121        .zip(mask2.to_boolean_buffer().iter())
122        .map(|(a, b)| a || b)
123        .collect();
124    let combined_mask = Mask::from_buffer(combined_pattern);
125
126    // Apply combined mask directly
127    let directly_masked = mask(array, &combined_mask).vortex_unwrap();
128
129    // Results should be identical
130    assert_eq!(
131        double_masked.len(),
132        directly_masked.len(),
133        "Sequential masking and combined masking should produce arrays of the same length. \
134         Sequential length: {}, Combined length: {}",
135        double_masked.len(),
136        directly_masked.len()
137    );
138
139    for i in 0..double_masked.len() {
140        let double_val = double_masked.scalar_at(i);
141        let direct_val = directly_masked.scalar_at(i);
142        assert_eq!(
143            double_val, direct_val,
144            "Sequential masking and combined masking produced different values at index {i}. \
145             Sequential masking value: {double_val:?}, Combined masking value: {direct_val:?}\n\
146             This likely indicates an issue with how masks are composed in the array implementation."
147        );
148    }
149}
150
151/// Tests that filtering with an all-true mask preserves the array.
152///
153/// # Invariant
154/// `filter(array, all_true_mask)` should equal `array`
155///
156/// # Test Details
157/// - Creates a mask with all elements set to true
158/// - Applies filter with this mask
159/// - Verifies the result is identical to the original array
160///
161/// # Why This Matters
162/// This is an identity operation that should be optimized in implementations
163/// to avoid unnecessary copying.
164fn test_filter_identity(array: &dyn Array) {
165    let len = array.len();
166    if len == 0 {
167        return;
168    }
169
170    let all_true_mask = Mask::new_true(len);
171    let filtered = filter(array, &all_true_mask).vortex_unwrap();
172
173    // Filtered array should be identical to original
174    assert_eq!(
175        filtered.len(),
176        array.len(),
177        "Filtering with all-true mask should preserve array length. \
178         Original length: {}, Filtered length: {}",
179        array.len(),
180        filtered.len()
181    );
182
183    for i in 0..len {
184        let original_val = array.scalar_at(i);
185        let filtered_val = filtered.scalar_at(i);
186        assert_eq!(
187            filtered_val, original_val,
188            "Filtering with all-true mask should preserve all values. \
189             Value at index {i} changed from {original_val:?} to {filtered_val:?}"
190        );
191    }
192}
193
194/// Tests that masking with an all-false mask preserves values while making them nullable.
195///
196/// # Invariant
197/// `mask(array, all_false_mask)` should have same values as `array` but with nullable type
198///
199/// # Test Details
200/// - Creates a mask with all elements set to false (no elements are nullified)
201/// - Applies mask operation
202/// - Verifies all values are preserved but the array type becomes nullable
203///
204/// # Why This Matters
205/// Masking always produces a nullable array, even when no values are actually masked.
206/// This test ensures the type system handles this correctly.
207fn test_mask_identity(array: &dyn Array) {
208    let len = array.len();
209    if len == 0 {
210        return;
211    }
212
213    let all_false_mask = Mask::new_false(len);
214    let masked = mask(array, &all_false_mask).vortex_unwrap();
215
216    // Masked array should have same values (just nullable)
217    assert_eq!(
218        masked.len(),
219        array.len(),
220        "Masking with all-false mask should preserve array length. \
221         Original length: {}, Masked length: {}",
222        array.len(),
223        masked.len()
224    );
225
226    assert!(
227        masked.dtype().is_nullable(),
228        "Mask operation should always produce a nullable array, but dtype is {:?}",
229        masked.dtype()
230    );
231
232    for i in 0..len {
233        let original_val = array.scalar_at(i);
234        let masked_val = masked.scalar_at(i);
235        let expected_val = original_val.clone().into_nullable();
236        assert_eq!(
237            masked_val, expected_val,
238            "Masking with all-false mask should preserve values (as nullable). \
239             Value at index {i}: original = {original_val:?}, masked = {masked_val:?}, expected = {expected_val:?}"
240        );
241    }
242}
243
244/// Tests that slice and filter with contiguous mask produce same results.
245///
246/// # Invariant
247/// `filter(array, contiguous_true_mask)` should equal `slice(array, start, end)`
248///
249/// # Test Details
250/// - Creates a mask that is true only for indices 1, 2, and 3
251/// - Filters the array with this mask
252/// - Slices the array from index 1 to 4
253/// - Verifies both operations produce identical results
254///
255/// # Why This Matters
256/// When a filter mask represents a contiguous range, it should be equivalent to
257/// a slice operation. Some implementations may optimize this case.
258fn test_slice_filter_consistency(array: &dyn Array) {
259    let len = array.len();
260    if len < 4 {
261        return; // Need at least 4 elements for meaningful test
262    }
263
264    // Create a contiguous mask (true from index 1 to 3)
265    let mut mask_pattern = vec![false; len];
266    mask_pattern[1..4.min(len)].fill(true);
267
268    let mask = Mask::from_iter(mask_pattern);
269    let filtered = filter(array, &mask).vortex_unwrap();
270
271    // Slice should produce the same result
272    let sliced = array.slice(1..4.min(len));
273
274    assert_eq!(
275        filtered.len(),
276        sliced.len(),
277        "Filter with contiguous mask and slice should produce same length. \
278         \nFiltered length: {}\nSliced length: {}",
279        filtered.len(),
280        sliced.len()
281    );
282
283    for i in 0..filtered.len() {
284        let filtered_val = filtered.scalar_at(i);
285        let sliced_val = sliced.scalar_at(i);
286        assert_eq!(
287            filtered_val, sliced_val,
288            "Filter with contiguous mask and slice produced different values at index {i}. \
289             \nFiltered value: {filtered_val:?}\nSliced value: {sliced_val:?}"
290        );
291    }
292}
293
294/// Tests that take with sequential indices equals slice.
295///
296/// # Invariant
297/// `take(array, [1, 2, 3, ...])` should equal `slice(array, 1, n)`
298///
299/// # Test Details
300/// - Creates indices array with sequential values [1, 2, 3]
301/// - Takes elements at these indices
302/// - Slices array from index 1 to 4
303/// - Verifies both operations produce identical results
304///
305/// # Why This Matters
306/// Sequential takes are a common pattern that can be optimized to slice operations.
307fn test_take_slice_consistency(array: &dyn Array) {
308    let len = array.len();
309    if len < 3 {
310        return; // Need at least 3 elements
311    }
312
313    // Take indices [1, 2, 3]
314    let end = 4.min(len);
315    let indices = PrimitiveArray::from_iter((1..end).map(|i| i as u64)).into_array();
316    let taken = take(array, &indices).vortex_unwrap();
317
318    // Slice from 1 to end
319    let sliced = array.slice(1..end);
320
321    assert_eq!(
322        taken.len(),
323        sliced.len(),
324        "Take with sequential indices and slice should produce same length. \
325         \nTaken length: {}\nSliced length: {}",
326        taken.len(),
327        sliced.len()
328    );
329
330    for i in 0..taken.len() {
331        let taken_val = taken.scalar_at(i);
332        let sliced_val = sliced.scalar_at(i);
333        assert_eq!(
334            taken_val, sliced_val,
335            "Take with sequential indices and slice produced different values at index {i}. \
336             \nTaken value: {taken_val:?}\nSliced value: {sliced_val:?}"
337        );
338    }
339}
340
341/// Tests that filter preserves relative ordering
342fn test_filter_preserves_order(array: &dyn Array) {
343    let len = array.len();
344    if len < 4 {
345        return;
346    }
347
348    // Create a mask that selects elements at indices 0, 2, 3
349    let mask_pattern: Vec<bool> = (0..len).map(|i| i == 0 || i == 2 || i == 3).collect();
350    let mask = Mask::from_iter(mask_pattern);
351
352    let filtered = filter(array, &mask).vortex_unwrap();
353
354    // Verify the filtered array contains the right elements in order
355    assert_eq!(filtered.len(), 3.min(len));
356    if len >= 4 {
357        assert_eq!(filtered.scalar_at(0), array.scalar_at(0));
358        assert_eq!(filtered.scalar_at(1), array.scalar_at(2),);
359        assert_eq!(filtered.scalar_at(2), array.scalar_at(3));
360    }
361}
362
363/// Tests that take with repeated indices works correctly
364fn test_take_repeated_indices(array: &dyn Array) {
365    let len = array.len();
366    if len == 0 {
367        return;
368    }
369
370    // Take the first element three times
371    let indices = buffer![0u64, 0, 0].into_array();
372    let taken = take(array, &indices).vortex_unwrap();
373
374    assert_eq!(taken.len(), 3);
375    for i in 0..3 {
376        assert_eq!(taken.scalar_at(i), array.scalar_at(0),);
377    }
378}
379
380/// Tests mask and filter interaction with nulls
381fn test_mask_filter_null_consistency(array: &dyn Array) {
382    let len = array.len();
383    if len < 3 {
384        return;
385    }
386
387    // First mask some elements
388    let mask_pattern: Vec<bool> = (0..len).map(|i| i % 2 == 0).collect();
389    let mask_array = Mask::from_iter(mask_pattern);
390    let masked = mask(array, &mask_array).vortex_unwrap();
391
392    // Then filter to remove the nulls
393    let filter_pattern: Vec<bool> = (0..len).map(|i| i % 2 != 0).collect();
394    let filter_mask = Mask::from_iter(filter_pattern);
395    let filtered = filter(&masked, &filter_mask).vortex_unwrap();
396
397    // This should be equivalent to directly filtering the original array
398    let direct_filtered = filter(array, &filter_mask).vortex_unwrap();
399
400    assert_eq!(filtered.len(), direct_filtered.len());
401    for i in 0..filtered.len() {
402        assert_eq!(filtered.scalar_at(i), direct_filtered.scalar_at(i));
403    }
404}
405
406/// Tests that empty operations are consistent
407fn test_empty_operations_consistency(array: &dyn Array) {
408    let len = array.len();
409
410    // Empty filter
411    let empty_filter = filter(array, &Mask::new_false(len)).vortex_unwrap();
412    assert_eq!(empty_filter.len(), 0);
413    assert_eq!(empty_filter.dtype(), array.dtype());
414
415    // Empty take
416    let empty_indices = PrimitiveArray::empty::<u64>(Nullability::NonNullable).into_array();
417    let empty_take = take(array, &empty_indices).vortex_unwrap();
418    assert_eq!(empty_take.len(), 0);
419    assert_eq!(empty_take.dtype(), array.dtype());
420
421    // Empty slice (if array is non-empty)
422    if len > 0 {
423        let empty_slice = array.slice(0..0);
424        assert_eq!(empty_slice.len(), 0);
425        assert_eq!(empty_slice.dtype(), array.dtype());
426    }
427}
428
429/// Tests that take preserves array properties
430fn test_take_preserves_properties(array: &dyn Array) {
431    let len = array.len();
432    if len == 0 {
433        return;
434    }
435
436    // Take all elements in original order
437    let indices = PrimitiveArray::from_iter((0..len).map(|i| i as u64)).into_array();
438    let taken = take(array, &indices).vortex_unwrap();
439
440    // Should be identical to original
441    assert_eq!(taken.len(), array.len());
442    assert_eq!(taken.dtype(), array.dtype());
443    for i in 0..len {
444        assert_eq!(taken.scalar_at(i), array.scalar_at(i),);
445    }
446}
447
448/// Tests consistency with nullable indices.
449///
450/// # Invariant
451/// `take(array, [Some(0), None, Some(2)])` should produce `[array[0], null, array[2]]`
452///
453/// # Test Details
454/// - Creates an indices array with null at position 1: `[Some(0), None, Some(2)]`
455/// - Takes elements using these indices
456/// - Verifies that:
457///   - Position 0 contains the value from array index 0
458///   - Position 1 contains null
459///   - Position 2 contains the value from array index 2
460///   - The result array has nullable type
461///
462/// # Why This Matters
463/// Nullable indices are a powerful feature that allows introducing nulls during
464/// a take operation, which is useful for outer joins and similar operations.
465fn test_nullable_indices_consistency(array: &dyn Array) {
466    let len = array.len();
467    if len < 3 {
468        return; // Need at least 3 elements to test indices 0 and 2
469    }
470
471    // Create nullable indices where some indices are null
472    let indices = PrimitiveArray::from_option_iter([Some(0u64), None, Some(2u64)]).into_array();
473
474    let taken = take(array, &indices).vortex_unwrap();
475
476    // Result should have nulls where indices were null
477    assert_eq!(
478        taken.len(),
479        3,
480        "Take with nullable indices should produce array of length 3, got {}",
481        taken.len()
482    );
483
484    assert!(
485        taken.dtype().is_nullable(),
486        "Take with nullable indices should produce nullable array, but dtype is {:?}",
487        taken.dtype()
488    );
489
490    // Check first element (from index 0)
491    let expected_0 = array.scalar_at(0).into_nullable();
492    let actual_0 = taken.scalar_at(0);
493    assert_eq!(
494        actual_0, expected_0,
495        "Take with nullable indices: element at position 0 should be from array index 0. \
496         Expected: {expected_0:?}, Actual: {actual_0:?}"
497    );
498
499    // Check second element (should be null)
500    let actual_1 = taken.scalar_at(1);
501    assert!(
502        actual_1.is_null(),
503        "Take with nullable indices: element at position 1 should be null, but got {actual_1:?}"
504    );
505
506    // Check third element (from index 2)
507    let expected_2 = array.scalar_at(2).into_nullable();
508    let actual_2 = taken.scalar_at(2);
509    assert_eq!(
510        actual_2, expected_2,
511        "Take with nullable indices: element at position 2 should be from array index 2. \
512         Expected: {expected_2:?}, Actual: {actual_2:?}"
513    );
514}
515
516/// Tests large array consistency
517fn test_large_array_consistency(array: &dyn Array) {
518    let len = array.len();
519    if len < 1000 {
520        return;
521    }
522
523    // Test with every 10th element
524    let indices: Vec<u64> = (0..len).step_by(10).map(|i| i as u64).collect();
525    let indices_array = PrimitiveArray::from_iter(indices).into_array();
526    let taken = take(array, &indices_array).vortex_unwrap();
527
528    // Create equivalent filter mask
529    let mask_pattern: Vec<bool> = (0..len).map(|i| i % 10 == 0).collect();
530    let mask = Mask::from_iter(mask_pattern);
531    let filtered = filter(array, &mask).vortex_unwrap();
532
533    // Results should match
534    assert_eq!(taken.len(), filtered.len());
535    for i in 0..taken.len() {
536        assert_eq!(taken.scalar_at(i), filtered.scalar_at(i),);
537    }
538}
539
540/// Tests that comparison operations follow inverse relationships.
541///
542/// # Invariants
543/// - `compare(array, value, Eq)` is the inverse of `compare(array, value, NotEq)`
544/// - `compare(array, value, Gt)` is the inverse of `compare(array, value, Lte)`
545/// - `compare(array, value, Lt)` is the inverse of `compare(array, value, Gte)`
546///
547/// # Test Details
548/// - Creates comparison results for each operator
549/// - Verifies that inverse operations produce opposite boolean values
550/// - Tests with multiple scalar values to ensure consistency
551///
552/// # Why This Matters
553/// Comparison operations must maintain logical consistency across encodings.
554/// This test catches bugs where an encoding might implement one comparison
555/// correctly but fail on its logical inverse.
556fn test_comparison_inverse_consistency(array: &dyn Array) {
557    let len = array.len();
558    if len == 0 {
559        return;
560    }
561
562    // Skip non-comparable types.
563    match array.dtype() {
564        DType::Null | DType::Extension(_) | DType::Struct(..) | DType::List(..) => return,
565        _ => {}
566    }
567
568    // Get a test value from the middle of the array
569    let test_scalar = if len == 0 {
570        return;
571    } else {
572        array.scalar_at(len / 2)
573    };
574
575    // Test Eq vs NotEq
576    let const_array = ConstantArray::new(test_scalar, len);
577    if let (Ok(eq_result), Ok(neq_result)) = (
578        compare(array, const_array.as_ref(), Operator::Eq),
579        compare(array, const_array.as_ref(), Operator::NotEq),
580    ) {
581        let inverted_eq = invert(&eq_result).vortex_unwrap();
582
583        assert_eq!(
584            inverted_eq.len(),
585            neq_result.len(),
586            "Inverted Eq should have same length as NotEq"
587        );
588
589        for i in 0..inverted_eq.len() {
590            let inv_val = inverted_eq.scalar_at(i);
591            let neq_val = neq_result.scalar_at(i);
592            assert_eq!(
593                inv_val, neq_val,
594                "At index {i}: NOT(Eq) should equal NotEq. \
595                 NOT(Eq) = {inv_val:?}, NotEq = {neq_val:?}"
596            );
597        }
598    }
599
600    // Test Gt vs Lte
601    if let (Ok(gt_result), Ok(lte_result)) = (
602        compare(array, const_array.as_ref(), Operator::Gt),
603        compare(array, const_array.as_ref(), Operator::Lte),
604    ) {
605        let inverted_gt = invert(&gt_result).vortex_unwrap();
606
607        for i in 0..inverted_gt.len() {
608            let inv_val = inverted_gt.scalar_at(i);
609            let lte_val = lte_result.scalar_at(i);
610            assert_eq!(
611                inv_val, lte_val,
612                "At index {i}: NOT(Gt) should equal Lte. \
613                 NOT(Gt) = {inv_val:?}, Lte = {lte_val:?}"
614            );
615        }
616    }
617
618    // Test Lt vs Gte
619    if let (Ok(lt_result), Ok(gte_result)) = (
620        compare(array, const_array.as_ref(), Operator::Lt),
621        compare(array, const_array.as_ref(), Operator::Gte),
622    ) {
623        let inverted_lt = invert(&lt_result).vortex_unwrap();
624
625        for i in 0..inverted_lt.len() {
626            let inv_val = inverted_lt.scalar_at(i);
627            let gte_val = gte_result.scalar_at(i);
628            assert_eq!(
629                inv_val, gte_val,
630                "At index {i}: NOT(Lt) should equal Gte. \
631                 NOT(Lt) = {inv_val:?}, Gte = {gte_val:?}"
632            );
633        }
634    }
635}
636
637/// Tests that comparison operations maintain proper symmetry relationships.
638///
639/// # Invariants
640/// - `compare(array, value, Gt)` should equal `compare_scalar_array(value, array, Lt)`
641/// - `compare(array, value, Lt)` should equal `compare_scalar_array(value, array, Gt)`
642/// - `compare(array, value, Eq)` should equal `compare_scalar_array(value, array, Eq)`
643///
644/// # Test Details
645/// - Compares array-scalar operations with their symmetric scalar-array versions
646/// - Verifies that ordering relationships are properly reversed
647/// - Tests equality which should be symmetric
648///
649/// # Why This Matters
650/// Ensures that comparison operations maintain mathematical ordering properties
651/// regardless of operand order.
652fn test_comparison_symmetry_consistency(array: &dyn Array) {
653    let len = array.len();
654    if len == 0 {
655        return;
656    }
657
658    // Skip non-comparable types.
659    match array.dtype() {
660        DType::Null | DType::Extension(_) | DType::Struct(..) | DType::List(..) => return,
661        _ => {}
662    }
663
664    // Get test values
665    let test_scalar = if len == 2 {
666        return;
667    } else {
668        array.scalar_at(len / 2)
669    };
670
671    // Create a constant array with the test scalar for reverse comparison
672    let const_array = ConstantArray::new(test_scalar, len);
673
674    // Test Gt vs Lt symmetry
675    if let (Ok(arr_gt_scalar), Ok(scalar_lt_arr)) = (
676        compare(array, const_array.as_ref(), Operator::Gt),
677        compare(const_array.as_ref(), array, Operator::Lt),
678    ) {
679        assert_eq!(
680            arr_gt_scalar.len(),
681            scalar_lt_arr.len(),
682            "Symmetric comparisons should have same length"
683        );
684
685        for i in 0..arr_gt_scalar.len() {
686            let arr_gt = arr_gt_scalar.scalar_at(i);
687            let scalar_lt = scalar_lt_arr.scalar_at(i);
688            assert_eq!(
689                arr_gt, scalar_lt,
690                "At index {i}: (array > scalar) should equal (scalar < array). \
691                 array > scalar = {arr_gt:?}, scalar < array = {scalar_lt:?}"
692            );
693        }
694    }
695
696    // Test Eq symmetry
697    if let (Ok(arr_eq_scalar), Ok(scalar_eq_arr)) = (
698        compare(array, const_array.as_ref(), Operator::Eq),
699        compare(const_array.as_ref(), array, Operator::Eq),
700    ) {
701        for i in 0..arr_eq_scalar.len() {
702            let arr_eq = arr_eq_scalar.scalar_at(i);
703            let scalar_eq = scalar_eq_arr.scalar_at(i);
704            assert_eq!(
705                arr_eq, scalar_eq,
706                "At index {i}: (array == scalar) should equal (scalar == array). \
707                 array == scalar = {arr_eq:?}, scalar == array = {scalar_eq:?}"
708            );
709        }
710    }
711}
712
713/// Tests that boolean operations follow De Morgan's laws.
714///
715/// # Invariants
716/// - `NOT(A AND B)` equals `(NOT A) OR (NOT B)`
717/// - `NOT(A OR B)` equals `(NOT A) AND (NOT B)`
718///
719/// # Test Details
720/// - If the array is boolean, uses it directly for testing boolean operations
721/// - Creates two boolean masks from patterns based on the array
722/// - Computes AND/OR operations and their inversions
723/// - Verifies De Morgan's laws hold for all elements
724///
725/// # Why This Matters
726/// Boolean operations must maintain logical consistency across encodings.
727/// This test catches bugs where encodings might optimize boolean operations
728/// incorrectly, breaking fundamental logical properties.
729fn test_boolean_demorgan_consistency(array: &dyn Array) {
730    if !matches!(array.dtype(), DType::Bool(_)) {
731        return;
732    }
733
734    let mask = {
735        let mask_pattern: Vec<bool> = (0..array.len()).map(|i| i % 3 == 0).collect();
736        BoolArray::from_iter(mask_pattern)
737    };
738    let mask = mask.as_ref();
739
740    // Test first De Morgan's law: NOT(A AND B) = (NOT A) OR (NOT B)
741    if let (Ok(a_and_b), Ok(not_a), Ok(not_b)) = (and(array, mask), invert(array), invert(mask)) {
742        let not_a_and_b = invert(&a_and_b).vortex_unwrap();
743        let not_a_or_not_b = or(&not_a, &not_b).vortex_unwrap();
744
745        assert_eq!(
746            not_a_and_b.len(),
747            not_a_or_not_b.len(),
748            "De Morgan's law results should have same length"
749        );
750
751        for i in 0..not_a_and_b.len() {
752            let left = not_a_and_b.scalar_at(i);
753            let right = not_a_or_not_b.scalar_at(i);
754            assert_eq!(
755                left, right,
756                "De Morgan's first law failed at index {i}: \
757                 NOT(A AND B) = {left:?}, (NOT A) OR (NOT B) = {right:?}"
758            );
759        }
760    }
761
762    // Test second De Morgan's law: NOT(A OR B) = (NOT A) AND (NOT B)
763    if let (Ok(a_or_b), Ok(not_a), Ok(not_b)) = (or(array, mask), invert(array), invert(mask)) {
764        let not_a_or_b = invert(&a_or_b).vortex_unwrap();
765        let not_a_and_not_b = and(&not_a, &not_b).vortex_unwrap();
766
767        for i in 0..not_a_or_b.len() {
768            let left = not_a_or_b.scalar_at(i);
769            let right = not_a_and_not_b.scalar_at(i);
770            assert_eq!(
771                left, right,
772                "De Morgan's second law failed at index {i}: \
773                 NOT(A OR B) = {left:?}, (NOT A) AND (NOT B) = {right:?}"
774            );
775        }
776    }
777}
778
779/// Tests that slice and aggregate operations produce consistent results.
780///
781/// # Invariants
782/// - Aggregating a sliced array should equal aggregating the corresponding
783///   elements from the canonical form
784/// - This applies to sum, count, min/max, and other aggregate functions
785///
786/// # Test Details
787/// - Slices the array and computes aggregates
788/// - Compares against aggregating the canonical form's slice
789/// - Tests multiple aggregate functions where applicable
790///
791/// # Why This Matters
792/// Aggregate operations on sliced arrays must produce correct results
793/// regardless of the underlying encoding's offset handling.
794fn test_slice_aggregate_consistency(array: &dyn Array) {
795    use vortex_dtype::DType;
796
797    use crate::compute::{min_max, nan_count, sum};
798
799    let len = array.len();
800    if len < 5 {
801        return; // Need enough elements for meaningful slice
802    }
803
804    // Define slice bounds
805    let start = 1;
806    let end = (len - 1).min(start + 10); // Take up to 10 elements
807
808    // Get sliced array and canonical slice
809    let sliced = array.slice(start..end);
810    let canonical = array.to_canonical();
811    let canonical_sliced = canonical.as_ref().slice(start..end);
812
813    // Test null count through invalid_count
814    assert_eq!(
815        sliced.invalid_count(),
816        canonical_sliced.invalid_count(),
817        "null_count on sliced array should match canonical. \
818             Sliced: {}, Canonical: {}",
819        sliced.invalid_count(),
820        canonical_sliced.invalid_count()
821    );
822
823    // Test sum for numeric types
824    if !matches!(array.dtype(), DType::Primitive(..)) {
825        return;
826    }
827
828    if let (Ok(slice_sum), Ok(canonical_sum)) = (sum(&sliced), sum(&canonical_sliced)) {
829        // Compare sum scalars
830        assert_eq!(
831            slice_sum, canonical_sum,
832            "sum on sliced array should match canonical. \
833                 Sliced: {slice_sum:?}, Canonical: {canonical_sum:?}"
834        );
835    }
836
837    // Test min_max
838    if let (Ok(slice_minmax), Ok(canonical_minmax)) = (min_max(&sliced), min_max(&canonical_sliced))
839    {
840        match (slice_minmax, canonical_minmax) {
841            (Some(s_result), Some(c_result)) => {
842                assert_eq!(
843                    s_result.min, c_result.min,
844                    "min on sliced array should match canonical. \
845                         Sliced: {:?}, Canonical: {:?}",
846                    s_result.min, c_result.min
847                );
848                assert_eq!(
849                    s_result.max, c_result.max,
850                    "max on sliced array should match canonical. \
851                         Sliced: {:?}, Canonical: {:?}",
852                    s_result.max, c_result.max
853                );
854            }
855            (None, None) => {} // Both empty, OK
856            _ => vortex_panic!("min_max results don't match"),
857        }
858    }
859
860    // Test nan_count for floating point types
861    if array.dtype().is_float()
862        && let (Ok(slice_nan_count), Ok(canonical_nan_count)) =
863            (nan_count(&sliced), nan_count(&canonical_sliced))
864    {
865        assert_eq!(
866            slice_nan_count, canonical_nan_count,
867            "nan_count on sliced array should match canonical. \
868                 Sliced: {slice_nan_count}, Canonical: {canonical_nan_count}"
869        );
870    }
871}
872
873/// Tests that cast operations preserve array properties when sliced.
874///
875/// # Invariant
876/// `cast(slice(array, start, end), dtype)` should equal `slice(cast(array, dtype), start, end)`
877///
878/// # Test Details
879/// - Slices the array from index 2 to 7 (or len-2 if smaller)
880/// - Casts the sliced array to a different type
881/// - Compares against the canonical form of the array (without slicing or casting the canonical form)
882/// - Verifies both approaches produce identical results
883///
884/// # Why This Matters
885/// This test specifically catches bugs where encodings (like RunEndArray) fail to preserve
886/// offset information during cast operations. Such bugs can lead to incorrect data being
887/// returned after casting a sliced array.
888fn test_cast_slice_consistency(array: &dyn Array) {
889    let len = array.len();
890    if len < 5 {
891        return; // Need at least 5 elements for meaningful slice
892    }
893
894    // Define slice bounds
895    let start = 2;
896    let end = 7.min(len - 2).max(start + 1); // Ensure we have at least 1 element
897
898    // Get canonical form of the original array
899    let canonical = array.to_canonical();
900
901    // Choose appropriate target dtype based on the array's type
902    let target_dtypes = match array.dtype() {
903        DType::Null => vec![],
904        DType::Bool(nullability) => vec![
905            DType::Primitive(PType::U8, *nullability),
906            DType::Primitive(PType::I32, *nullability),
907        ],
908        DType::Primitive(ptype, nullability) => {
909            let mut targets = vec![];
910            // Test nullability changes
911            let opposite_nullability = match nullability {
912                Nullability::NonNullable => Nullability::Nullable,
913                Nullability::Nullable => Nullability::NonNullable,
914            };
915            targets.push(DType::Primitive(*ptype, opposite_nullability));
916
917            // Test widening casts
918            match ptype {
919                PType::U8 => {
920                    targets.push(DType::Primitive(PType::U16, *nullability));
921                    targets.push(DType::Primitive(PType::I16, *nullability));
922                }
923                PType::U16 => {
924                    targets.push(DType::Primitive(PType::U32, *nullability));
925                    targets.push(DType::Primitive(PType::I32, *nullability));
926                }
927                PType::U32 => {
928                    targets.push(DType::Primitive(PType::U64, *nullability));
929                    targets.push(DType::Primitive(PType::I64, *nullability));
930                }
931                PType::U64 => {
932                    targets.push(DType::Primitive(PType::F64, *nullability));
933                }
934                PType::I8 => {
935                    targets.push(DType::Primitive(PType::I16, *nullability));
936                    targets.push(DType::Primitive(PType::F32, *nullability));
937                }
938                PType::I16 => {
939                    targets.push(DType::Primitive(PType::I32, *nullability));
940                    targets.push(DType::Primitive(PType::F32, *nullability));
941                }
942                PType::I32 => {
943                    targets.push(DType::Primitive(PType::I64, *nullability));
944                    targets.push(DType::Primitive(PType::F64, *nullability));
945                }
946                PType::I64 => {
947                    targets.push(DType::Primitive(PType::F64, *nullability));
948                }
949                PType::F16 => {
950                    targets.push(DType::Primitive(PType::F32, *nullability));
951                }
952                PType::F32 => {
953                    targets.push(DType::Primitive(PType::F64, *nullability));
954                    targets.push(DType::Primitive(PType::I32, *nullability));
955                }
956                PType::F64 => {
957                    targets.push(DType::Primitive(PType::I64, *nullability));
958                }
959            }
960            targets
961        }
962        DType::Utf8(nullability) => {
963            let opposite = match nullability {
964                Nullability::NonNullable => Nullability::Nullable,
965                Nullability::Nullable => Nullability::NonNullable,
966            };
967            vec![DType::Utf8(opposite), DType::Binary(*nullability)]
968        }
969        DType::Binary(nullability) => {
970            let opposite = match nullability {
971                Nullability::NonNullable => Nullability::Nullable,
972                Nullability::Nullable => Nullability::NonNullable,
973            };
974            vec![
975                DType::Binary(opposite),
976                DType::Utf8(*nullability), // May fail if not valid UTF-8
977            ]
978        }
979        DType::Decimal(decimal_type, nullability) => {
980            let opposite = match nullability {
981                Nullability::NonNullable => Nullability::Nullable,
982                Nullability::Nullable => Nullability::NonNullable,
983            };
984            vec![DType::Decimal(*decimal_type, opposite)]
985        }
986        DType::Struct(fields, nullability) => {
987            let opposite = match nullability {
988                Nullability::NonNullable => Nullability::Nullable,
989                Nullability::Nullable => Nullability::NonNullable,
990            };
991            vec![DType::Struct(fields.clone(), opposite)]
992        }
993        DType::List(element_type, nullability) => {
994            let opposite = match nullability {
995                Nullability::NonNullable => Nullability::Nullable,
996                Nullability::Nullable => Nullability::NonNullable,
997            };
998            vec![DType::List(element_type.clone(), opposite)]
999        }
1000        DType::FixedSizeList(element_type, list_size, nullability) => {
1001            let opposite = match nullability {
1002                Nullability::NonNullable => Nullability::Nullable,
1003                Nullability::Nullable => Nullability::NonNullable,
1004            };
1005            vec![DType::FixedSizeList(
1006                element_type.clone(),
1007                *list_size,
1008                opposite,
1009            )]
1010        }
1011        DType::Extension(_) => vec![], // Extension types typically only cast to themselves
1012    };
1013
1014    // Test each target dtype
1015    for target_dtype in target_dtypes {
1016        // Slice the array
1017        let sliced = array.slice(start..end);
1018
1019        // Try to cast the sliced array
1020        let slice_then_cast = match cast(&sliced, &target_dtype) {
1021            Ok(result) => result,
1022            Err(_) => continue, // Skip if cast fails
1023        };
1024
1025        // Verify against canonical form
1026        assert_eq!(
1027            slice_then_cast.len(),
1028            end - start,
1029            "Sliced and casted array should have length {}, but has {}",
1030            end - start,
1031            slice_then_cast.len()
1032        );
1033
1034        // Compare each value against the canonical form
1035        for i in 0..slice_then_cast.len() {
1036            let slice_cast_val = slice_then_cast.scalar_at(i);
1037
1038            // Get the corresponding value from the canonical array (adjusted for slice offset)
1039            let canonical_val = canonical.as_ref().scalar_at(start + i);
1040
1041            // Cast the canonical scalar to the target dtype
1042            let expected_val = match canonical_val.cast(&target_dtype) {
1043                Ok(val) => val,
1044                Err(_) => {
1045                    // If scalar cast fails, we can't compare - skip this target dtype
1046                    // This can happen for some type conversions that aren't supported at scalar level
1047                    break;
1048                }
1049            };
1050
1051            assert_eq!(
1052                slice_cast_val,
1053                expected_val,
1054                "Cast of sliced array produced incorrect value at index {i}. \
1055                 Got: {slice_cast_val:?}, Expected: {expected_val:?} \
1056                 (canonical value at index {}: {canonical_val:?})\n\
1057                 This likely indicates the array encoding doesn't preserve offset information during cast.",
1058                start + i
1059            );
1060        }
1061
1062        // Also test the other way: cast then slice
1063        let casted = match cast(array, &target_dtype) {
1064            Ok(result) => result,
1065            Err(_) => continue, // Skip if cast fails
1066        };
1067        let cast_then_slice = casted.slice(start..end);
1068
1069        // Verify the two approaches produce identical results
1070        assert_eq!(
1071            slice_then_cast.len(),
1072            cast_then_slice.len(),
1073            "Slice-then-cast and cast-then-slice should produce arrays of the same length"
1074        );
1075
1076        for i in 0..slice_then_cast.len() {
1077            let slice_cast_val = slice_then_cast.scalar_at(i);
1078            let cast_slice_val = cast_then_slice.scalar_at(i);
1079            assert_eq!(
1080                slice_cast_val, cast_slice_val,
1081                "Slice-then-cast and cast-then-slice produced different values at index {i}. \
1082                 Slice-then-cast: {slice_cast_val:?}, Cast-then-slice: {cast_slice_val:?}"
1083            );
1084        }
1085    }
1086}
1087
1088/// Run all consistency tests on an array.
1089///
1090/// This function executes a comprehensive suite of consistency tests that verify
1091/// the correctness of compute operations on Vortex arrays.
1092///
1093/// # Test Suite Overview
1094///
1095/// ## Core Operation Consistency
1096/// - **Filter/Take**: Verifies `filter(array, mask)` equals `take(array, true_indices)`
1097/// - **Mask Composition**: Ensures sequential masks equal combined masks
1098/// - **Slice/Filter**: Checks contiguous filters equal slice operations
1099/// - **Take/Slice**: Validates sequential takes equal slice operations
1100/// - **Cast/Slice**: Ensures cast operations preserve sliced array properties
1101///
1102/// ## Boolean Operations
1103/// - **De Morgan's Laws**: Verifies boolean operations follow logical laws
1104///
1105/// ## Comparison Operations
1106/// - **Inverse Relationships**: Verifies logical inverses (Eq/NotEq, Gt/Lte, Lt/Gte)
1107/// - **Symmetry**: Ensures proper ordering relationships when operands are swapped
1108///
1109/// ## Aggregate Operations
1110/// - **Slice/Aggregate**: Verifies aggregates on sliced arrays match canonical
1111///
1112/// ## Identity Operations
1113/// - **Filter Identity**: All-true mask preserves the array
1114/// - **Mask Identity**: All-false mask preserves values (as nullable)
1115/// - **Take Identity**: Taking all indices preserves the array
1116///
1117/// ## Edge Cases
1118/// - **Empty Operations**: Empty filters, takes, and slices behave correctly
1119/// - **Single Element**: Operations work with single-element arrays
1120/// - **Repeated Indices**: Take with duplicate indices works correctly
1121///
1122/// ## Null Handling
1123/// - **Nullable Indices**: Null indices produce null values
1124/// - **Mask/Filter Interaction**: Masking then filtering behaves predictably
1125///
1126/// ## Large Arrays
1127/// - **Performance**: Operations scale correctly to large arrays (1000+ elements)
1128/// ```text
1129pub fn test_array_consistency(array: &dyn Array) {
1130    // Core operation consistency
1131    test_filter_take_consistency(array);
1132    test_double_mask_consistency(array);
1133    test_slice_filter_consistency(array);
1134    test_take_slice_consistency(array);
1135    test_cast_slice_consistency(array);
1136
1137    // Boolean operations
1138    test_boolean_demorgan_consistency(array);
1139
1140    // Comparison operations
1141    test_comparison_inverse_consistency(array);
1142    test_comparison_symmetry_consistency(array);
1143
1144    // Aggregate operations
1145    test_slice_aggregate_consistency(array);
1146
1147    // Identity operations
1148    test_filter_identity(array);
1149    test_mask_identity(array);
1150    test_take_preserves_properties(array);
1151
1152    // Ordering and correctness
1153    test_filter_preserves_order(array);
1154    test_take_repeated_indices(array);
1155
1156    // Null handling
1157    test_mask_filter_null_consistency(array);
1158    test_nullable_indices_consistency(array);
1159
1160    // Edge cases
1161    test_empty_operations_consistency(array);
1162    test_large_array_consistency(array);
1163}