1use vortex_error::VortexResult;
3
4use crate::aliases::hash_set::HashSet;
5use crate::stats::PRUNING_STATS;
6use crate::{Array, ArrayRef, EncodingId};
7
8pub trait CompressionStrategy {
10 fn compress(&self, array: &dyn Array) -> VortexResult<ArrayRef>;
12
13 fn used_encodings(&self) -> HashSet<EncodingId>;
15}
16
17pub fn check_validity_unchanged(arr: &dyn Array, compressed: &dyn Array) {
19 let _ = arr;
20 let _ = compressed;
21 #[cfg(debug_assertions)]
22 {
23 use vortex_error::VortexExpect;
24
25 let old_validity = arr
26 .validity_mask()
27 .vortex_expect("failed to compute validity")
28 .len();
29 let new_validity = compressed
30 .validity_mask()
31 .vortex_expect("failed to compute validity ")
32 .len();
33
34 debug_assert!(
35 old_validity == new_validity,
36 "validity length changed after compression: {old_validity} -> {new_validity}\n From tree {} To tree {}\n",
37 arr.tree_display(),
38 compressed.tree_display()
39 );
40 }
41}
42
43pub fn check_dtype_unchanged(arr: &dyn Array, compressed: &dyn Array) {
45 let _ = arr;
46 let _ = compressed;
47 #[cfg(debug_assertions)]
48 {
49 debug_assert!(
50 arr.dtype() == compressed.dtype(),
51 "Compression changed dtype: {} -> {}\nFrom array: {}Into array {}",
52 arr.dtype(),
53 compressed.dtype(),
54 arr.tree_display(),
55 compressed.tree_display(),
56 );
57 }
58}
59
60pub fn check_statistics_unchanged(arr: &dyn Array, compressed: &dyn Array) {
62 let _ = arr;
63 let _ = compressed;
64 #[cfg(debug_assertions)]
65 {
66 use crate::stats::StatsProviderExt;
67
68 for (stat, value) in arr.statistics().to_owned().into_iter() {
70 if let Some(dtype) = stat.dtype(compressed.dtype()) {
71 let compressed_scalar = compressed.statistics().get_scalar(stat, &dtype);
72 debug_assert_eq!(
73 compressed_scalar,
74 Some(value.clone().into_scalar(dtype)),
75 "Compression changed {stat} from {value} to {:?}",
76 compressed_scalar.as_ref(),
77 );
78 }
79 }
80 }
81}
82
83pub fn compute_precompression_stats(arr: &dyn Array) -> VortexResult<()> {
86 arr.statistics().compute_uncompressed_size_in_bytes();
87 arr.statistics().compute_all(PRUNING_STATS).map(|_| ())
88}