Skip to main content

vortex_array/
hash.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::hash::Hash;
6use std::hash::Hasher;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_mask::Mask;
11
12use crate::Array;
13use crate::ArrayRef;
14use crate::patches::Patches;
15use crate::validity::Validity;
16
17/// The precision level for structural equality and hashing of arrays.
18///
19/// This configuration option defines how precise the hash/equals results are given the set of
20/// data buffers backing the array.
21#[derive(Clone, Copy, Debug)]
22pub enum Precision {
23    /// Data buffers are compared by their pointer and length only. This is the fastest option, but
24    /// may lead to false negatives if two arrays contain identical data but are backed by
25    /// different buffers.
26    Ptr,
27    /// Data buffers are compared by their full content. This is the slowest option, but guarantees
28    /// that two arrays with identical data will be considered equal.
29    Value,
30}
31
32/// A hash trait for arrays that represents structural equality with a configurable level of
33/// precision. This trait is used primarily to implement common subtree elimination and other
34/// array-based caching mechanisms.
35///
36/// The precision of the hash defines what level of structural equality is represented. See
37/// [`Precision`] for more details.
38///
39/// Note that where [`Precision::Ptr`] is used, the hash is only valid for the lifetime of the
40/// object.
41pub trait ArrayHash {
42    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision);
43}
44
45/// A dynamic version of [`ArrayHash`].
46pub trait DynArrayHash: private::SealedHash {
47    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: Precision);
48}
49
50impl<T: ArrayHash + ?Sized> DynArrayHash for T {
51    fn dyn_array_hash(&self, mut state: &mut dyn Hasher, precision: Precision) {
52        ArrayHash::array_hash(self, &mut state, precision);
53    }
54}
55
56/// An equality trait for arrays that represents structural equality with a configurable level of
57/// precision. This trait is used primarily to implement common subtree elimination and other
58/// array-based caching mechanisms.
59///
60/// The precision of the equality check defines what level of structural equality is represented. See
61/// [`Precision`] for more details.
62pub trait ArrayEq {
63    fn array_eq(&self, other: &Self, precision: Precision) -> bool;
64}
65
66/// A dynamic version of [`ArrayEq`].
67pub trait DynArrayEq: private::SealedEq {
68    fn dyn_array_eq(&self, other: &dyn Any, precision: Precision) -> bool;
69}
70
71impl<T: ArrayEq + 'static> DynArrayEq for T {
72    fn dyn_array_eq(&self, other: &dyn Any, precision: Precision) -> bool {
73        other
74            .downcast_ref::<Self>()
75            .is_some_and(|other| ArrayEq::array_eq(self, other, precision))
76    }
77}
78
79mod private {
80    use crate::ArrayEq;
81    use crate::ArrayHash;
82
83    pub trait SealedHash {}
84    impl<T: ArrayHash + ?Sized> SealedHash for T {}
85    pub trait SealedEq {}
86    impl<T: ArrayEq + ?Sized> SealedEq for T {}
87}
88
89impl ArrayHash for dyn Array + '_ {
90    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
91        self.dyn_array_hash(state, precision);
92    }
93}
94
95impl ArrayEq for dyn Array + '_ {
96    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
97        self.dyn_array_eq(Array::as_any(other), precision)
98    }
99}
100
101impl ArrayHash for ArrayRef {
102    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
103        self.as_ref().array_hash(state, precision);
104    }
105}
106
107impl ArrayEq for ArrayRef {
108    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
109        self.as_ref().array_eq(other.as_ref(), precision)
110    }
111}
112
113impl<T: Hash> ArrayHash for Buffer<T> {
114    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
115        match precision {
116            Precision::Ptr => {
117                self.as_ptr().hash(state);
118                self.len().hash(state);
119            }
120            Precision::Value => {
121                self.as_ref().hash(state);
122            }
123        }
124    }
125}
126impl<T: PartialEq> ArrayEq for Buffer<T> {
127    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
128        match precision {
129            Precision::Ptr => self.as_ptr() == other.as_ptr() && self.len() == other.len(),
130            Precision::Value => self.as_ref() == other.as_ref(),
131        }
132    }
133}
134
135impl ArrayHash for BitBuffer {
136    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
137        match precision {
138            Precision::Ptr => {
139                self.inner().as_ptr().hash(state);
140                self.offset().hash(state);
141                self.len().hash(state);
142            }
143            Precision::Value => {
144                // NOTE(ngates): this is really rather expensive...
145                for chunk in self.chunks().iter_padded() {
146                    chunk.hash(state);
147                }
148            }
149        }
150    }
151}
152impl ArrayEq for BitBuffer {
153    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
154        match precision {
155            Precision::Ptr => {
156                self.inner().as_ptr() == other.inner().as_ptr()
157                    && self.offset() == other.offset()
158                    && self.len() == other.len()
159            }
160            Precision::Value => self.eq(other),
161        }
162    }
163}
164
165impl<T: ArrayHash> ArrayHash for Option<T> {
166    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
167        match self {
168            Some(value) => {
169                true.hash(state);
170                value.array_hash(state, precision);
171            }
172            None => {
173                false.hash(state);
174            }
175        }
176    }
177}
178
179impl<T: ArrayEq> ArrayEq for Option<T> {
180    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
181        match (self, other) {
182            (Some(v1), Some(v2)) => v1.array_eq(v2, precision),
183            (None, None) => true,
184            _ => false,
185        }
186    }
187}
188
189impl ArrayHash for Mask {
190    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
191        std::mem::discriminant(self).hash(state);
192        match self {
193            Mask::AllTrue(len) => {
194                len.hash(state);
195            }
196            Mask::AllFalse(len) => {
197                len.hash(state);
198            }
199            Mask::Values(values) => {
200                values.bit_buffer().array_hash(state, precision);
201            }
202        }
203    }
204}
205impl ArrayEq for Mask {
206    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
207        match (self, other) {
208            (Mask::AllTrue(len1), Mask::AllTrue(len2)) => len1 == len2,
209            (Mask::AllFalse(len1), Mask::AllFalse(len2)) => len1 == len2,
210            (Mask::Values(buf1), Mask::Values(buf2)) => {
211                buf1.bit_buffer().array_eq(buf2.bit_buffer(), precision)
212            }
213            _ => false,
214        }
215    }
216}
217
218impl ArrayHash for Validity {
219    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
220        std::mem::discriminant(self).hash(state);
221        if let Validity::Array(array) = self {
222            array.array_hash(state, precision);
223        }
224    }
225}
226
227impl ArrayEq for Validity {
228    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
229        match (self, other) {
230            (Validity::AllValid, Validity::AllValid) => true,
231            (Validity::AllInvalid, Validity::AllInvalid) => true,
232            (Validity::NonNullable, Validity::NonNullable) => true,
233            (Validity::Array(arr1), Validity::Array(arr2)) => arr1.array_eq(arr2, precision),
234            _ => false,
235        }
236    }
237}
238
239impl ArrayHash for Patches {
240    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
241        self.array_len().hash(state);
242        self.offset().hash(state);
243        self.indices().array_hash(state, precision);
244        self.values().array_hash(state, precision);
245    }
246}
247
248impl ArrayEq for Patches {
249    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
250        self.array_len() == other.array_len()
251            && self.offset() == other.offset()
252            && self.indices().array_eq(other.indices(), precision)
253            && self.values().array_eq(other.values(), precision)
254    }
255}