Skip to main content

vortex_array/
hash.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::hash::Hash;
6use std::hash::Hasher;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_mask::Mask;
11
12use crate::patches::Patches;
13use crate::validity::Validity;
14
15/// The precision level for structural equality and hashing of arrays.
16///
17/// This configuration option defines how precise the hash/equals results are given the set of
18/// data buffers backing the array.
19#[derive(Clone, Copy, Debug)]
20pub enum Precision {
21    /// Data buffers are compared by their pointer and length only. This is the fastest option, but
22    /// may lead to false negatives if two arrays contain identical data but are backed by
23    /// different buffers.
24    Ptr,
25    /// Data buffers are compared by their full content. This is the slowest option, but guarantees
26    /// that two arrays with identical data will be considered equal.
27    Value,
28}
29
30/// A hash trait for arrays that represents structural equality with a configurable level of
31/// precision. This trait is used primarily to implement common subtree elimination and other
32/// array-based caching mechanisms.
33///
34/// The precision of the hash defines what level of structural equality is represented. See
35/// [`Precision`] for more details.
36///
37/// Note that where [`Precision::Ptr`] is used, the hash is only valid for the lifetime of the
38/// object.
39pub trait ArrayHash {
40    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision);
41}
42
43/// A dynamic version of [`ArrayHash`].
44pub trait DynArrayHash: private::SealedHash {
45    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: Precision);
46}
47
48impl<T: ArrayHash + ?Sized> DynArrayHash for T {
49    fn dyn_array_hash(&self, mut state: &mut dyn Hasher, precision: Precision) {
50        ArrayHash::array_hash(self, &mut state, precision);
51    }
52}
53
54/// An equality trait for arrays that represents structural equality with a configurable level of
55/// precision. This trait is used primarily to implement common subtree elimination and other
56/// array-based caching mechanisms.
57///
58/// The precision of the equality check defines what level of structural equality is represented. See
59/// [`Precision`] for more details.
60pub trait ArrayEq {
61    fn array_eq(&self, other: &Self, precision: Precision) -> bool;
62}
63
64/// A dynamic version of [`ArrayEq`].
65pub trait DynArrayEq: private::SealedEq {
66    fn dyn_array_eq(&self, other: &dyn Any, precision: Precision) -> bool;
67}
68
69impl<T: ArrayEq + 'static> DynArrayEq for T {
70    fn dyn_array_eq(&self, other: &dyn Any, precision: Precision) -> bool {
71        other
72            .downcast_ref::<Self>()
73            .is_some_and(|other| ArrayEq::array_eq(self, other, precision))
74    }
75}
76
77mod private {
78    use crate::ArrayEq;
79    use crate::ArrayHash;
80
81    pub trait SealedHash {}
82    impl<T: ArrayHash + ?Sized> SealedHash for T {}
83    pub trait SealedEq {}
84    impl<T: ArrayEq + ?Sized> SealedEq for T {}
85}
86
87impl<T: Hash> ArrayHash for Buffer<T> {
88    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
89        match precision {
90            Precision::Ptr => {
91                self.as_ptr().hash(state);
92                self.len().hash(state);
93            }
94            Precision::Value => {
95                self.as_ref().hash(state);
96            }
97        }
98    }
99}
100impl<T: PartialEq> ArrayEq for Buffer<T> {
101    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
102        match precision {
103            Precision::Ptr => self.as_ptr() == other.as_ptr() && self.len() == other.len(),
104            Precision::Value => self.as_ref() == other.as_ref(),
105        }
106    }
107}
108
109impl ArrayHash for BitBuffer {
110    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
111        match precision {
112            Precision::Ptr => {
113                self.inner().as_ptr().hash(state);
114                self.offset().hash(state);
115                self.len().hash(state);
116            }
117            Precision::Value => {
118                // NOTE(ngates): this is really rather expensive...
119                for chunk in self.chunks().iter_padded() {
120                    chunk.hash(state);
121                }
122            }
123        }
124    }
125}
126impl ArrayEq for BitBuffer {
127    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
128        match precision {
129            Precision::Ptr => {
130                self.inner().as_ptr() == other.inner().as_ptr()
131                    && self.offset() == other.offset()
132                    && self.len() == other.len()
133            }
134            Precision::Value => self.eq(other),
135        }
136    }
137}
138
139impl<T: ArrayHash> ArrayHash for Option<T> {
140    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
141        match self {
142            Some(value) => {
143                true.hash(state);
144                value.array_hash(state, precision);
145            }
146            None => {
147                false.hash(state);
148            }
149        }
150    }
151}
152
153impl<T: ArrayEq> ArrayEq for Option<T> {
154    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
155        match (self, other) {
156            (Some(v1), Some(v2)) => v1.array_eq(v2, precision),
157            (None, None) => true,
158            _ => false,
159        }
160    }
161}
162
163impl ArrayHash for Mask {
164    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
165        std::mem::discriminant(self).hash(state);
166        match self {
167            Mask::AllTrue(len) => {
168                len.hash(state);
169            }
170            Mask::AllFalse(len) => {
171                len.hash(state);
172            }
173            Mask::Values(values) => {
174                values.bit_buffer().array_hash(state, precision);
175            }
176        }
177    }
178}
179impl ArrayEq for Mask {
180    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
181        match (self, other) {
182            (Mask::AllTrue(len1), Mask::AllTrue(len2)) => len1 == len2,
183            (Mask::AllFalse(len1), Mask::AllFalse(len2)) => len1 == len2,
184            (Mask::Values(buf1), Mask::Values(buf2)) => {
185                buf1.bit_buffer().array_eq(buf2.bit_buffer(), precision)
186            }
187            _ => false,
188        }
189    }
190}
191
192impl ArrayHash for Validity {
193    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
194        std::mem::discriminant(self).hash(state);
195        if let Validity::Array(array) = self {
196            array.array_hash(state, precision);
197        }
198    }
199}
200
201impl ArrayEq for Validity {
202    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
203        match (self, other) {
204            (Validity::AllValid, Validity::AllValid) => true,
205            (Validity::AllInvalid, Validity::AllInvalid) => true,
206            (Validity::NonNullable, Validity::NonNullable) => true,
207            (Validity::Array(arr1), Validity::Array(arr2)) => arr1.array_eq(arr2, precision),
208            _ => false,
209        }
210    }
211}
212
213impl ArrayHash for Patches {
214    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
215        self.array_len().hash(state);
216        self.offset().hash(state);
217        self.indices().array_hash(state, precision);
218        self.values().array_hash(state, precision);
219    }
220}
221
222impl ArrayEq for Patches {
223    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
224        self.array_len() == other.array_len()
225            && self.offset() == other.offset()
226            && self.indices().array_eq(other.indices(), precision)
227            && self.values().array_eq(other.values(), precision)
228    }
229}