vortex_array/
hash.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::hash::Hash;
6use std::hash::Hasher;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_buffer::BufferHandle;
11use vortex_mask::Mask;
12
13use crate::Array;
14use crate::ArrayRef;
15use crate::patches::Patches;
16use crate::validity::Validity;
17
18/// The precision level for structural equality and hashing of arrays.
19///
20/// This configuration option defines how precise the hash/equals results are given the set of
21/// data buffers backing the array.
22#[derive(Clone, Copy, Debug)]
23pub enum Precision {
24    /// Data buffers are compared by their pointer and length only. This is the fastest option, but
25    /// may lead to false negatives if two arrays contain identical data but are backed by
26    /// different buffers.
27    Ptr,
28    /// Data buffers are compared by their full content. This is the slowest option, but guarantees
29    /// that two arrays with identical data will be considered equal.
30    Value,
31}
32
33/// A hash trait for arrays that represents structural equality with a configurable level of
34/// precision. This trait is used primarily to implement common subtree elimination and other
35/// array-based caching mechanisms.
36///
37/// The precision of the hash defines what level of structural equality is represented. See
38/// [`Precision`] for more details.
39///
40/// Note that where [`Precision::Ptr`] is used, the hash is only valid for the lifetime of the
41/// object.
42pub trait ArrayHash {
43    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision);
44}
45
46/// A dynamic version of [`ArrayHash`].
47pub trait DynArrayHash: private::SealedHash {
48    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: Precision);
49}
50
51impl<T: ArrayHash + ?Sized> DynArrayHash for T {
52    fn dyn_array_hash(&self, mut state: &mut dyn Hasher, precision: Precision) {
53        ArrayHash::array_hash(self, &mut state, precision);
54    }
55}
56
57/// An equality trait for arrays that represents structural equality with a configurable level of
58/// precision. This trait is used primarily to implement common subtree elimination and other
59/// array-based caching mechanisms.
60///
61/// The precision of the equality check defines what level of structural equality is represented. See
62/// [`Precision`] for more details.
63pub trait ArrayEq {
64    fn array_eq(&self, other: &Self, precision: Precision) -> bool;
65}
66
67/// A dynamic version of [`ArrayEq`].
68pub trait DynArrayEq: private::SealedEq {
69    fn dyn_array_eq(&self, other: &dyn Any, precision: Precision) -> bool;
70}
71
72impl<T: ArrayEq + 'static> DynArrayEq for T {
73    fn dyn_array_eq(&self, other: &dyn Any, precision: Precision) -> bool {
74        other
75            .downcast_ref::<Self>()
76            .is_some_and(|other| ArrayEq::array_eq(self, other, precision))
77    }
78}
79
80mod private {
81    use crate::ArrayEq;
82    use crate::ArrayHash;
83
84    pub trait SealedHash {}
85    impl<T: ArrayHash + ?Sized> SealedHash for T {}
86    pub trait SealedEq {}
87    impl<T: ArrayEq + ?Sized> SealedEq for T {}
88}
89
90impl ArrayHash for dyn Array + '_ {
91    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
92        self.dyn_array_hash(state, precision);
93    }
94}
95
96impl ArrayEq for dyn Array + '_ {
97    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
98        self.dyn_array_eq(other.as_any(), precision)
99    }
100}
101
102impl ArrayHash for ArrayRef {
103    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
104        self.as_ref().array_hash(state, precision);
105    }
106}
107
108impl ArrayEq for ArrayRef {
109    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
110        self.as_ref().array_eq(other.as_ref(), precision)
111    }
112}
113
114impl<T: Hash> ArrayHash for Buffer<T> {
115    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
116        match precision {
117            Precision::Ptr => {
118                self.as_ptr().hash(state);
119                self.len().hash(state);
120            }
121            Precision::Value => {
122                self.as_ref().hash(state);
123            }
124        }
125    }
126}
127impl<T: PartialEq> ArrayEq for Buffer<T> {
128    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
129        match precision {
130            Precision::Ptr => self.as_ptr() == other.as_ptr() && self.len() == other.len(),
131            Precision::Value => self.as_ref() == other.as_ref(),
132        }
133    }
134}
135
136impl ArrayHash for BitBuffer {
137    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
138        match precision {
139            Precision::Ptr => {
140                self.inner().as_ptr().hash(state);
141                self.offset().hash(state);
142                self.len().hash(state);
143            }
144            Precision::Value => {
145                // NOTE(ngates): this is really rather expensive...
146                for chunk in self.chunks().iter_padded() {
147                    chunk.hash(state);
148                }
149            }
150        }
151    }
152}
153impl ArrayEq for BitBuffer {
154    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
155        match precision {
156            Precision::Ptr => {
157                self.inner().as_ptr() == other.inner().as_ptr()
158                    && self.offset() == other.offset()
159                    && self.len() == other.len()
160            }
161            Precision::Value => self.eq(other),
162        }
163    }
164}
165
166impl<T: ArrayHash> ArrayHash for Option<T> {
167    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
168        match self {
169            Some(value) => {
170                true.hash(state);
171                value.array_hash(state, precision);
172            }
173            None => {
174                false.hash(state);
175            }
176        }
177    }
178}
179
180impl<T: ArrayEq> ArrayEq for Option<T> {
181    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
182        match (self, other) {
183            (Some(v1), Some(v2)) => v1.array_eq(v2, precision),
184            (None, None) => true,
185            _ => false,
186        }
187    }
188}
189
190impl ArrayHash for Mask {
191    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
192        std::mem::discriminant(self).hash(state);
193        match self {
194            Mask::AllTrue(len) => {
195                len.hash(state);
196            }
197            Mask::AllFalse(len) => {
198                len.hash(state);
199            }
200            Mask::Values(values) => {
201                values.bit_buffer().array_hash(state, precision);
202            }
203        }
204    }
205}
206impl ArrayEq for Mask {
207    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
208        match (self, other) {
209            (Mask::AllTrue(len1), Mask::AllTrue(len2)) => len1 == len2,
210            (Mask::AllFalse(len1), Mask::AllFalse(len2)) => len1 == len2,
211            (Mask::Values(buf1), Mask::Values(buf2)) => {
212                buf1.bit_buffer().array_eq(buf2.bit_buffer(), precision)
213            }
214            _ => false,
215        }
216    }
217}
218
219impl ArrayHash for Validity {
220    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
221        std::mem::discriminant(self).hash(state);
222        if let Validity::Array(array) = self {
223            array.array_hash(state, precision);
224        }
225    }
226}
227
228impl ArrayEq for Validity {
229    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
230        match (self, other) {
231            (Validity::AllValid, Validity::AllValid) => true,
232            (Validity::AllInvalid, Validity::AllInvalid) => true,
233            (Validity::NonNullable, Validity::NonNullable) => true,
234            (Validity::Array(arr1), Validity::Array(arr2)) => arr1.array_eq(arr2, precision),
235            _ => false,
236        }
237    }
238}
239
240impl ArrayHash for Patches {
241    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
242        self.array_len().hash(state);
243        self.offset().hash(state);
244        self.indices().array_hash(state, precision);
245        self.values().array_hash(state, precision);
246    }
247}
248
249impl ArrayEq for Patches {
250    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
251        self.array_len() == other.array_len()
252            && self.offset() == other.offset()
253            && self.indices().array_eq(other.indices(), precision)
254            && self.values().array_eq(other.values(), precision)
255    }
256}
257
258impl ArrayHash for BufferHandle {
259    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
260        match self {
261            BufferHandle::Buffer(b) => b.array_hash(state, precision),
262            BufferHandle::DeviceBuffer => (),
263        }
264    }
265}
266
267impl ArrayEq for BufferHandle {
268    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
269        match (self, other) {
270            (BufferHandle::Buffer(b), BufferHandle::Buffer(b2)) => b.array_eq(b2, precision),
271            (BufferHandle::DeviceBuffer, BufferHandle::DeviceBuffer) => true,
272            _ => false,
273        }
274    }
275}