vortex_array/
hash.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::hash::{Hash, Hasher};
6
7use vortex_buffer::{BitBuffer, Buffer};
8use vortex_mask::Mask;
9
10use crate::patches::Patches;
11use crate::validity::Validity;
12use crate::{Array, ArrayRef};
13
14/// The precision level for structural equality and hashing of arrays.
15///
16/// This configuration option defines how precise the hash/equals results are given the set of
17/// data buffers backing the array.
18#[derive(Clone, Copy, Debug)]
19pub enum Precision {
20    /// Data buffers are compared by their pointer and length only. This is the fastest option, but
21    /// may lead to false negatives if two arrays contain identical data but are backed by
22    /// different buffers.
23    Ptr,
24    /// Data buffers are compared by their full content. This is the slowest option, but guarantees
25    /// that two arrays with identical data will be considered equal.
26    Value,
27}
28
29/// A hash trait for arrays that represents structural equality with a configurable level of
30/// precision. This trait is used primarily to implement common subtree elimination and other
31/// array-based caching mechanisms.
32///
33/// The precision of the hash defines what level of structural equality is represented. See
34/// [`Precision`] for more details.
35///
36/// Note that where [`Precision::Ptr`] is used, the hash is only valid for the lifetime of the
37/// object.
38pub trait ArrayHash {
39    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision);
40}
41
42/// A dynamic version of [`ArrayHash`].
43pub trait DynArrayHash: private::SealedHash {
44    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: Precision);
45}
46
47impl<T: ArrayHash + ?Sized> DynArrayHash for T {
48    fn dyn_array_hash(&self, mut state: &mut dyn Hasher, precision: Precision) {
49        ArrayHash::array_hash(self, &mut state, precision);
50    }
51}
52
53/// An equality trait for arrays that represents structural equality with a configurable level of
54/// precision. This trait is used primarily to implement common subtree elimination and other
55/// array-based caching mechanisms.
56///
57/// The precision of the equality check defines what level of structural equality is represented. See
58/// [`Precision`] for more details.
59pub trait ArrayEq {
60    fn array_eq(&self, other: &Self, precision: Precision) -> bool;
61}
62
63/// A dynamic version of [`ArrayEq`].
64pub trait DynArrayEq: private::SealedEq {
65    fn dyn_array_eq(&self, other: &dyn Any, precision: Precision) -> bool;
66}
67
68impl<T: ArrayEq + 'static> DynArrayEq for T {
69    fn dyn_array_eq(&self, other: &dyn Any, precision: Precision) -> bool {
70        other
71            .downcast_ref::<Self>()
72            .is_some_and(|other| ArrayEq::array_eq(self, other, precision))
73    }
74}
75
76mod private {
77    use crate::{ArrayEq, ArrayHash};
78
79    pub trait SealedHash {}
80    impl<T: ArrayHash + ?Sized> SealedHash for T {}
81    pub trait SealedEq {}
82    impl<T: ArrayEq + ?Sized> SealedEq for T {}
83}
84
85impl ArrayHash for dyn Array + '_ {
86    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
87        self.dyn_array_hash(state, precision);
88    }
89}
90
91impl ArrayEq for dyn Array + '_ {
92    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
93        self.dyn_array_eq(other.as_any(), precision)
94    }
95}
96
97impl ArrayHash for ArrayRef {
98    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
99        self.as_ref().array_hash(state, precision);
100    }
101}
102
103impl ArrayEq for ArrayRef {
104    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
105        self.as_ref().array_eq(other.as_ref(), precision)
106    }
107}
108
109impl<T: Hash> ArrayHash for Buffer<T> {
110    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
111        match precision {
112            Precision::Ptr => {
113                self.as_ptr().hash(state);
114                self.len().hash(state);
115            }
116            Precision::Value => {
117                self.as_ref().hash(state);
118            }
119        }
120    }
121}
122impl<T: PartialEq> ArrayEq for Buffer<T> {
123    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
124        match precision {
125            Precision::Ptr => self.as_ptr() == other.as_ptr() && self.len() == other.len(),
126            Precision::Value => self.as_ref() == other.as_ref(),
127        }
128    }
129}
130
131impl ArrayHash for BitBuffer {
132    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
133        match precision {
134            Precision::Ptr => {
135                self.inner().as_ptr().hash(state);
136                self.offset().hash(state);
137                self.len().hash(state);
138            }
139            Precision::Value => {
140                // NOTE(ngates): this is really rather expensive...
141                for chunk in self.chunks().iter_padded() {
142                    chunk.hash(state);
143                }
144            }
145        }
146    }
147}
148impl ArrayEq for BitBuffer {
149    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
150        match precision {
151            Precision::Ptr => {
152                self.inner().as_ptr() == other.inner().as_ptr()
153                    && self.offset() == other.offset()
154                    && self.len() == other.len()
155            }
156            Precision::Value => self.eq(other),
157        }
158    }
159}
160
161impl<T: ArrayHash> ArrayHash for Option<T> {
162    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
163        match self {
164            Some(value) => {
165                true.hash(state);
166                value.array_hash(state, precision);
167            }
168            None => {
169                false.hash(state);
170            }
171        }
172    }
173}
174
175impl<T: ArrayEq> ArrayEq for Option<T> {
176    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
177        match (self, other) {
178            (Some(v1), Some(v2)) => v1.array_eq(v2, precision),
179            (None, None) => true,
180            _ => false,
181        }
182    }
183}
184
185impl ArrayHash for Mask {
186    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
187        std::mem::discriminant(self).hash(state);
188        match self {
189            Mask::AllTrue(len) => {
190                len.hash(state);
191            }
192            Mask::AllFalse(len) => {
193                len.hash(state);
194            }
195            Mask::Values(values) => {
196                values.bit_buffer().array_hash(state, precision);
197            }
198        }
199    }
200}
201impl ArrayEq for Mask {
202    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
203        match (self, other) {
204            (Mask::AllTrue(len1), Mask::AllTrue(len2)) => len1 == len2,
205            (Mask::AllFalse(len1), Mask::AllFalse(len2)) => len1 == len2,
206            (Mask::Values(buf1), Mask::Values(buf2)) => {
207                buf1.bit_buffer().array_eq(buf2.bit_buffer(), precision)
208            }
209            _ => false,
210        }
211    }
212}
213
214impl ArrayHash for Validity {
215    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
216        std::mem::discriminant(self).hash(state);
217        if let Validity::Array(array) = self {
218            array.array_hash(state, precision);
219        }
220    }
221}
222
223impl ArrayEq for Validity {
224    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
225        match (self, other) {
226            (Validity::AllValid, Validity::AllValid) => true,
227            (Validity::AllInvalid, Validity::AllInvalid) => true,
228            (Validity::NonNullable, Validity::NonNullable) => true,
229            (Validity::Array(arr1), Validity::Array(arr2)) => arr1.array_eq(arr2, precision),
230            _ => false,
231        }
232    }
233}
234
235impl ArrayHash for Patches {
236    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
237        self.array_len().hash(state);
238        self.offset().hash(state);
239        self.indices().array_hash(state, precision);
240        self.values().array_hash(state, precision);
241    }
242}
243
244impl ArrayEq for Patches {
245    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
246        self.array_len() == other.array_len()
247            && self.offset() == other.offset()
248            && self.indices().array_eq(other.indices(), precision)
249            && self.values().array_eq(other.values(), precision)
250    }
251}