vortex_array/
compress.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
use vortex_error::VortexResult;

use crate::aliases::hash_set::HashSet;
use crate::encoding::EncodingRef;
use crate::stats::{ArrayStatistics as _, PRUNING_STATS};
use crate::ArrayData;

pub trait CompressionStrategy {
    fn compress(&self, array: &ArrayData) -> VortexResult<ArrayData>;

    fn used_encodings(&self) -> HashSet<EncodingRef>;
}

/// Check that compression did not alter the length of the validity array.
pub fn check_validity_unchanged(arr: &ArrayData, compressed: &ArrayData) {
    let _ = arr;
    let _ = compressed;
    #[cfg(debug_assertions)]
    {
        let old_validity = arr.with_dyn(|a| a.logical_validity().len());
        let new_validity = compressed.with_dyn(|a| a.logical_validity().len());

        debug_assert!(
            old_validity == new_validity,
            "validity length changed after compression: {old_validity} -> {new_validity}\n From tree {} To tree {}\n",
            arr.tree_display(),
            compressed.tree_display()
        );
    }
}

/// Check that compression did not alter the dtype
pub fn check_dtype_unchanged(arr: &ArrayData, compressed: &ArrayData) {
    let _ = arr;
    let _ = compressed;
    #[cfg(debug_assertions)]
    {
        use crate::ArrayDType;
        debug_assert!(
            arr.dtype() == compressed.dtype(),
            "Compression changed dtype: {} -> {}\nFrom array: {}Into array {}",
            arr.dtype(),
            compressed.dtype(),
            arr.tree_display(),
            compressed.tree_display(),
        );
    }
}

// Check that compression preserved the statistics.
pub fn check_statistics_unchanged(arr: &ArrayData, compressed: &ArrayData) {
    let _ = arr;
    let _ = compressed;
    #[cfg(debug_assertions)]
    {
        for (stat, value) in arr.statistics().to_set().into_iter() {
            debug_assert_eq!(
                compressed.statistics().get(stat),
                Some(value.clone()),
                "Compression changed {stat} from {value} to {}",
                compressed
                    .statistics()
                    .get(stat)
                    .map(|s| s.to_string())
                    .unwrap_or_else(|| "null".to_string())
            );
        }
    }
}

/// Compute pruning stats for an array.
pub fn compute_pruning_stats(arr: &ArrayData) -> VortexResult<()> {
    arr.statistics().compute_all(PRUNING_STATS).map(|_| ())
}