vortex_array/
hash.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::hash::Hash;
6use std::hash::Hasher;
7use std::sync::Arc;
8
9use vortex_buffer::BitBuffer;
10use vortex_buffer::Buffer;
11use vortex_mask::Mask;
12
13use crate::Array;
14use crate::ArrayRef;
15use crate::buffer::BufferHandle;
16use crate::patches::Patches;
17use crate::validity::Validity;
18
19/// The precision level for structural equality and hashing of arrays.
20///
21/// This configuration option defines how precise the hash/equals results are given the set of
22/// data buffers backing the array.
23#[derive(Clone, Copy, Debug)]
24pub enum Precision {
25    /// Data buffers are compared by their pointer and length only. This is the fastest option, but
26    /// may lead to false negatives if two arrays contain identical data but are backed by
27    /// different buffers.
28    Ptr,
29    /// Data buffers are compared by their full content. This is the slowest option, but guarantees
30    /// that two arrays with identical data will be considered equal.
31    Value,
32}
33
34/// A hash trait for arrays that represents structural equality with a configurable level of
35/// precision. This trait is used primarily to implement common subtree elimination and other
36/// array-based caching mechanisms.
37///
38/// The precision of the hash defines what level of structural equality is represented. See
39/// [`Precision`] for more details.
40///
41/// Note that where [`Precision::Ptr`] is used, the hash is only valid for the lifetime of the
42/// object.
43pub trait ArrayHash {
44    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision);
45}
46
47/// A dynamic version of [`ArrayHash`].
48pub trait DynArrayHash: private::SealedHash {
49    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: Precision);
50}
51
52impl<T: ArrayHash + ?Sized> DynArrayHash for T {
53    fn dyn_array_hash(&self, mut state: &mut dyn Hasher, precision: Precision) {
54        ArrayHash::array_hash(self, &mut state, precision);
55    }
56}
57
58/// An equality trait for arrays that represents structural equality with a configurable level of
59/// precision. This trait is used primarily to implement common subtree elimination and other
60/// array-based caching mechanisms.
61///
62/// The precision of the equality check defines what level of structural equality is represented. See
63/// [`Precision`] for more details.
64pub trait ArrayEq {
65    fn array_eq(&self, other: &Self, precision: Precision) -> bool;
66}
67
68/// A dynamic version of [`ArrayEq`].
69pub trait DynArrayEq: private::SealedEq {
70    fn dyn_array_eq(&self, other: &dyn Any, precision: Precision) -> bool;
71}
72
73impl<T: ArrayEq + 'static> DynArrayEq for T {
74    fn dyn_array_eq(&self, other: &dyn Any, precision: Precision) -> bool {
75        other
76            .downcast_ref::<Self>()
77            .is_some_and(|other| ArrayEq::array_eq(self, other, precision))
78    }
79}
80
81mod private {
82    use crate::ArrayEq;
83    use crate::ArrayHash;
84
85    pub trait SealedHash {}
86    impl<T: ArrayHash + ?Sized> SealedHash for T {}
87    pub trait SealedEq {}
88    impl<T: ArrayEq + ?Sized> SealedEq for T {}
89}
90
91impl ArrayHash for dyn Array + '_ {
92    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
93        self.dyn_array_hash(state, precision);
94    }
95}
96
97impl ArrayEq for dyn Array + '_ {
98    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
99        self.dyn_array_eq(other.as_any(), precision)
100    }
101}
102
103impl ArrayHash for ArrayRef {
104    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
105        self.as_ref().array_hash(state, precision);
106    }
107}
108
109impl ArrayEq for ArrayRef {
110    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
111        self.as_ref().array_eq(other.as_ref(), precision)
112    }
113}
114
115impl<T: Hash> ArrayHash for Buffer<T> {
116    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
117        match precision {
118            Precision::Ptr => {
119                self.as_ptr().hash(state);
120                self.len().hash(state);
121            }
122            Precision::Value => {
123                self.as_ref().hash(state);
124            }
125        }
126    }
127}
128impl<T: PartialEq> ArrayEq for Buffer<T> {
129    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
130        match precision {
131            Precision::Ptr => self.as_ptr() == other.as_ptr() && self.len() == other.len(),
132            Precision::Value => self.as_ref() == other.as_ref(),
133        }
134    }
135}
136
137impl ArrayHash for BitBuffer {
138    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
139        match precision {
140            Precision::Ptr => {
141                self.inner().as_ptr().hash(state);
142                self.offset().hash(state);
143                self.len().hash(state);
144            }
145            Precision::Value => {
146                // NOTE(ngates): this is really rather expensive...
147                for chunk in self.chunks().iter_padded() {
148                    chunk.hash(state);
149                }
150            }
151        }
152    }
153}
154impl ArrayEq for BitBuffer {
155    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
156        match precision {
157            Precision::Ptr => {
158                self.inner().as_ptr() == other.inner().as_ptr()
159                    && self.offset() == other.offset()
160                    && self.len() == other.len()
161            }
162            Precision::Value => self.eq(other),
163        }
164    }
165}
166
167impl<T: ArrayHash> ArrayHash for Option<T> {
168    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
169        match self {
170            Some(value) => {
171                true.hash(state);
172                value.array_hash(state, precision);
173            }
174            None => {
175                false.hash(state);
176            }
177        }
178    }
179}
180
181impl<T: ArrayEq> ArrayEq for Option<T> {
182    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
183        match (self, other) {
184            (Some(v1), Some(v2)) => v1.array_eq(v2, precision),
185            (None, None) => true,
186            _ => false,
187        }
188    }
189}
190
191impl ArrayHash for Mask {
192    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
193        std::mem::discriminant(self).hash(state);
194        match self {
195            Mask::AllTrue(len) => {
196                len.hash(state);
197            }
198            Mask::AllFalse(len) => {
199                len.hash(state);
200            }
201            Mask::Values(values) => {
202                values.bit_buffer().array_hash(state, precision);
203            }
204        }
205    }
206}
207impl ArrayEq for Mask {
208    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
209        match (self, other) {
210            (Mask::AllTrue(len1), Mask::AllTrue(len2)) => len1 == len2,
211            (Mask::AllFalse(len1), Mask::AllFalse(len2)) => len1 == len2,
212            (Mask::Values(buf1), Mask::Values(buf2)) => {
213                buf1.bit_buffer().array_eq(buf2.bit_buffer(), precision)
214            }
215            _ => false,
216        }
217    }
218}
219
220impl ArrayHash for Validity {
221    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
222        std::mem::discriminant(self).hash(state);
223        if let Validity::Array(array) = self {
224            array.array_hash(state, precision);
225        }
226    }
227}
228
229impl ArrayEq for Validity {
230    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
231        match (self, other) {
232            (Validity::AllValid, Validity::AllValid) => true,
233            (Validity::AllInvalid, Validity::AllInvalid) => true,
234            (Validity::NonNullable, Validity::NonNullable) => true,
235            (Validity::Array(arr1), Validity::Array(arr2)) => arr1.array_eq(arr2, precision),
236            _ => false,
237        }
238    }
239}
240
241impl ArrayHash for Patches {
242    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
243        self.array_len().hash(state);
244        self.offset().hash(state);
245        self.indices().array_hash(state, precision);
246        self.values().array_hash(state, precision);
247    }
248}
249
250impl ArrayEq for Patches {
251    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
252        self.array_len() == other.array_len()
253            && self.offset() == other.offset()
254            && self.indices().array_eq(other.indices(), precision)
255            && self.values().array_eq(other.values(), precision)
256    }
257}
258
259impl ArrayHash for BufferHandle {
260    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
261        match self {
262            BufferHandle::Host(b) => b.array_hash(state, precision),
263            BufferHandle::Device(_) => (),
264        }
265    }
266}
267
268impl ArrayEq for BufferHandle {
269    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
270        match (self, other) {
271            (BufferHandle::Host(b), BufferHandle::Host(b2)) => b.array_eq(b2, precision),
272            (BufferHandle::Device(b), BufferHandle::Device(b2)) => match precision {
273                Precision::Ptr => Arc::ptr_eq(b, b2),
274                Precision::Value => b.eq(b2),
275            },
276            _ => false,
277        }
278    }
279}