1#[cfg(not(feature = "force_hash_collisions"))]
21use std::sync::Arc;
22
23use ahash::RandomState;
24use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
25use arrow::array::*;
26use arrow::datatypes::*;
27#[cfg(not(feature = "force_hash_collisions"))]
28use arrow::{downcast_dictionary_array, downcast_primitive_array};
29
30#[cfg(not(feature = "force_hash_collisions"))]
31use crate::cast::{
32 as_binary_view_array, as_boolean_array, as_fixed_size_list_array,
33 as_generic_binary_array, as_large_list_array, as_list_array, as_map_array,
34 as_string_array, as_string_view_array, as_struct_array,
35};
36use crate::error::Result;
37#[cfg(not(feature = "force_hash_collisions"))]
38use crate::error::_internal_err;
39
40#[inline]
42pub fn combine_hashes(l: u64, r: u64) -> u64 {
43 let hash = (17 * 37u64).wrapping_add(l);
44 hash.wrapping_mul(37).wrapping_add(r)
45}
46
47#[cfg(not(feature = "force_hash_collisions"))]
48fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col: bool) {
49 if mul_col {
50 hashes_buffer.iter_mut().for_each(|hash| {
51 *hash = combine_hashes(random_state.hash_one(1), *hash);
53 })
54 } else {
55 hashes_buffer.iter_mut().for_each(|hash| {
56 *hash = random_state.hash_one(1);
57 })
58 }
59}
60
61pub trait HashValue {
62 fn hash_one(&self, state: &RandomState) -> u64;
63}
64
65impl<T: HashValue + ?Sized> HashValue for &T {
66 fn hash_one(&self, state: &RandomState) -> u64 {
67 T::hash_one(self, state)
68 }
69}
70
71macro_rules! hash_value {
72 ($($t:ty),+) => {
73 $(impl HashValue for $t {
74 fn hash_one(&self, state: &RandomState) -> u64 {
75 state.hash_one(self)
76 }
77 })+
78 };
79}
80hash_value!(i8, i16, i32, i64, i128, i256, u8, u16, u32, u64);
81hash_value!(bool, str, [u8], IntervalDayTime, IntervalMonthDayNano);
82
83macro_rules! hash_float_value {
84 ($(($t:ty, $i:ty)),+) => {
85 $(impl HashValue for $t {
86 fn hash_one(&self, state: &RandomState) -> u64 {
87 state.hash_one(<$i>::from_ne_bytes(self.to_ne_bytes()))
88 }
89 })+
90 };
91}
92hash_float_value!((half::f16, u16), (f32, u32), (f64, u64));
93
94#[cfg(not(feature = "force_hash_collisions"))]
98fn hash_array_primitive<T>(
99 array: &PrimitiveArray<T>,
100 random_state: &RandomState,
101 hashes_buffer: &mut [u64],
102 rehash: bool,
103) where
104 T: ArrowPrimitiveType<Native: HashValue>,
105{
106 assert_eq!(
107 hashes_buffer.len(),
108 array.len(),
109 "hashes_buffer and array should be of equal length"
110 );
111
112 if array.null_count() == 0 {
113 if rehash {
114 for (hash, &value) in hashes_buffer.iter_mut().zip(array.values().iter()) {
115 *hash = combine_hashes(value.hash_one(random_state), *hash);
116 }
117 } else {
118 for (hash, &value) in hashes_buffer.iter_mut().zip(array.values().iter()) {
119 *hash = value.hash_one(random_state);
120 }
121 }
122 } else if rehash {
123 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
124 if !array.is_null(i) {
125 let value = unsafe { array.value_unchecked(i) };
126 *hash = combine_hashes(value.hash_one(random_state), *hash);
127 }
128 }
129 } else {
130 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
131 if !array.is_null(i) {
132 let value = unsafe { array.value_unchecked(i) };
133 *hash = value.hash_one(random_state);
134 }
135 }
136 }
137}
138
139#[cfg(not(feature = "force_hash_collisions"))]
143fn hash_array<T>(
144 array: T,
145 random_state: &RandomState,
146 hashes_buffer: &mut [u64],
147 rehash: bool,
148) where
149 T: ArrayAccessor,
150 T::Item: HashValue,
151{
152 assert_eq!(
153 hashes_buffer.len(),
154 array.len(),
155 "hashes_buffer and array should be of equal length"
156 );
157
158 if array.null_count() == 0 {
159 if rehash {
160 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
161 let value = unsafe { array.value_unchecked(i) };
162 *hash = combine_hashes(value.hash_one(random_state), *hash);
163 }
164 } else {
165 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
166 let value = unsafe { array.value_unchecked(i) };
167 *hash = value.hash_one(random_state);
168 }
169 }
170 } else if rehash {
171 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
172 if !array.is_null(i) {
173 let value = unsafe { array.value_unchecked(i) };
174 *hash = combine_hashes(value.hash_one(random_state), *hash);
175 }
176 }
177 } else {
178 for (i, hash) in hashes_buffer.iter_mut().enumerate() {
179 if !array.is_null(i) {
180 let value = unsafe { array.value_unchecked(i) };
181 *hash = value.hash_one(random_state);
182 }
183 }
184 }
185}
186
187#[cfg(not(feature = "force_hash_collisions"))]
189#[inline]
190fn update_hash_for_dict_key(
191 hash: &mut u64,
192 dict_hashes: &[u64],
193 dict_values: &dyn Array,
194 idx: usize,
195 multi_col: bool,
196) {
197 if dict_values.is_valid(idx) {
198 if multi_col {
199 *hash = combine_hashes(dict_hashes[idx], *hash);
200 } else {
201 *hash = dict_hashes[idx];
202 }
203 }
204 }
206
207#[cfg(not(feature = "force_hash_collisions"))]
209fn hash_dictionary<K: ArrowDictionaryKeyType>(
210 array: &DictionaryArray<K>,
211 random_state: &RandomState,
212 hashes_buffer: &mut [u64],
213 multi_col: bool,
214) -> Result<()> {
215 let dict_values = Arc::clone(array.values());
219 let mut dict_hashes = vec![0; dict_values.len()];
220 create_hashes(&[dict_values], random_state, &mut dict_hashes)?;
221
222 let dict_values = array.values();
224 for (hash, key) in hashes_buffer.iter_mut().zip(array.keys().iter()) {
225 if let Some(key) = key {
226 let idx = key.as_usize();
227 update_hash_for_dict_key(
228 hash,
229 &dict_hashes,
230 dict_values.as_ref(),
231 idx,
232 multi_col,
233 );
234 } }
236 Ok(())
237}
238
239#[cfg(not(feature = "force_hash_collisions"))]
240fn hash_struct_array(
241 array: &StructArray,
242 random_state: &RandomState,
243 hashes_buffer: &mut [u64],
244) -> Result<()> {
245 let nulls = array.nulls();
246 let row_len = array.len();
247
248 let valid_row_indices: Vec<usize> = if let Some(nulls) = nulls {
249 nulls.valid_indices().collect()
250 } else {
251 (0..row_len).collect()
252 };
253
254 let mut values_hashes = vec![0u64; row_len];
256 create_hashes(array.columns(), random_state, &mut values_hashes)?;
257
258 for i in valid_row_indices {
259 let hash = &mut hashes_buffer[i];
260 *hash = combine_hashes(*hash, values_hashes[i]);
261 }
262
263 Ok(())
264}
265
266#[cfg(not(feature = "force_hash_collisions"))]
268fn hash_map_array(
269 array: &MapArray,
270 random_state: &RandomState,
271 hashes_buffer: &mut [u64],
272) -> Result<()> {
273 let nulls = array.nulls();
274 let offsets = array.offsets();
275
276 let mut values_hashes = vec![0u64; array.entries().len()];
278 create_hashes(array.entries().columns(), random_state, &mut values_hashes)?;
279
280 if let Some(nulls) = nulls {
282 for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
283 if nulls.is_valid(i) {
284 let hash = &mut hashes_buffer[i];
285 for values_hash in &values_hashes[start.as_usize()..stop.as_usize()] {
286 *hash = combine_hashes(*hash, *values_hash);
287 }
288 }
289 }
290 } else {
291 for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
292 let hash = &mut hashes_buffer[i];
293 for values_hash in &values_hashes[start.as_usize()..stop.as_usize()] {
294 *hash = combine_hashes(*hash, *values_hash);
295 }
296 }
297 }
298
299 Ok(())
300}
301
302#[cfg(not(feature = "force_hash_collisions"))]
303fn hash_list_array<OffsetSize>(
304 array: &GenericListArray<OffsetSize>,
305 random_state: &RandomState,
306 hashes_buffer: &mut [u64],
307) -> Result<()>
308where
309 OffsetSize: OffsetSizeTrait,
310{
311 let values = Arc::clone(array.values());
312 let offsets = array.value_offsets();
313 let nulls = array.nulls();
314 let mut values_hashes = vec![0u64; values.len()];
315 create_hashes(&[values], random_state, &mut values_hashes)?;
316 if let Some(nulls) = nulls {
317 for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
318 if nulls.is_valid(i) {
319 let hash = &mut hashes_buffer[i];
320 for values_hash in &values_hashes[start.as_usize()..stop.as_usize()] {
321 *hash = combine_hashes(*hash, *values_hash);
322 }
323 }
324 }
325 } else {
326 for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
327 let hash = &mut hashes_buffer[i];
328 for values_hash in &values_hashes[start.as_usize()..stop.as_usize()] {
329 *hash = combine_hashes(*hash, *values_hash);
330 }
331 }
332 }
333 Ok(())
334}
335
336#[cfg(not(feature = "force_hash_collisions"))]
337fn hash_fixed_list_array(
338 array: &FixedSizeListArray,
339 random_state: &RandomState,
340 hashes_buffer: &mut [u64],
341) -> Result<()> {
342 let values = Arc::clone(array.values());
343 let value_length = array.value_length() as usize;
344 let nulls = array.nulls();
345 let mut values_hashes = vec![0u64; values.len()];
346 create_hashes(&[values], random_state, &mut values_hashes)?;
347 if let Some(nulls) = nulls {
348 for i in 0..array.len() {
349 if nulls.is_valid(i) {
350 let hash = &mut hashes_buffer[i];
351 for values_hash in
352 &values_hashes[i * value_length..(i + 1) * value_length]
353 {
354 *hash = combine_hashes(*hash, *values_hash);
355 }
356 }
357 }
358 } else {
359 for i in 0..array.len() {
360 let hash = &mut hashes_buffer[i];
361 for values_hash in &values_hashes[i * value_length..(i + 1) * value_length] {
362 *hash = combine_hashes(*hash, *values_hash);
363 }
364 }
365 }
366 Ok(())
367}
368
369#[cfg(feature = "force_hash_collisions")]
374pub fn create_hashes<'a>(
375 _arrays: &[ArrayRef],
376 _random_state: &RandomState,
377 hashes_buffer: &'a mut Vec<u64>,
378) -> Result<&'a mut Vec<u64>> {
379 for hash in hashes_buffer.iter_mut() {
380 *hash = 0
381 }
382 Ok(hashes_buffer)
383}
384
385#[cfg(not(feature = "force_hash_collisions"))]
391pub fn create_hashes<'a>(
392 arrays: &[ArrayRef],
393 random_state: &RandomState,
394 hashes_buffer: &'a mut Vec<u64>,
395) -> Result<&'a mut Vec<u64>> {
396 for (i, col) in arrays.iter().enumerate() {
397 let array = col.as_ref();
398 let rehash = i >= 1;
400 downcast_primitive_array! {
401 array => hash_array_primitive(array, random_state, hashes_buffer, rehash),
402 DataType::Null => hash_null(random_state, hashes_buffer, rehash),
403 DataType::Boolean => hash_array(as_boolean_array(array)?, random_state, hashes_buffer, rehash),
404 DataType::Utf8 => hash_array(as_string_array(array)?, random_state, hashes_buffer, rehash),
405 DataType::Utf8View => hash_array(as_string_view_array(array)?, random_state, hashes_buffer, rehash),
406 DataType::LargeUtf8 => hash_array(as_largestring_array(array), random_state, hashes_buffer, rehash),
407 DataType::Binary => hash_array(as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, rehash),
408 DataType::BinaryView => hash_array(as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
409 DataType::LargeBinary => hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
410 DataType::FixedSizeBinary(_) => {
411 let array: &FixedSizeBinaryArray = array.as_any().downcast_ref().unwrap();
412 hash_array(array, random_state, hashes_buffer, rehash)
413 }
414 DataType::Dictionary(_, _) => downcast_dictionary_array! {
415 array => hash_dictionary(array, random_state, hashes_buffer, rehash)?,
416 _ => unreachable!()
417 }
418 DataType::Struct(_) => {
419 let array = as_struct_array(array)?;
420 hash_struct_array(array, random_state, hashes_buffer)?;
421 }
422 DataType::List(_) => {
423 let array = as_list_array(array)?;
424 hash_list_array(array, random_state, hashes_buffer)?;
425 }
426 DataType::LargeList(_) => {
427 let array = as_large_list_array(array)?;
428 hash_list_array(array, random_state, hashes_buffer)?;
429 }
430 DataType::Map(_, _) => {
431 let array = as_map_array(array)?;
432 hash_map_array(array, random_state, hashes_buffer)?;
433 }
434 DataType::FixedSizeList(_,_) => {
435 let array = as_fixed_size_list_array(array)?;
436 hash_fixed_list_array(array, random_state, hashes_buffer)?;
437 }
438 _ => {
439 return _internal_err!(
441 "Unsupported data type in hasher: {}",
442 col.data_type()
443 );
444 }
445 }
446 }
447 Ok(hashes_buffer)
448}
449
450#[cfg(test)]
451mod tests {
452 use std::sync::Arc;
453
454 use arrow::array::*;
455 #[cfg(not(feature = "force_hash_collisions"))]
456 use arrow::datatypes::*;
457
458 use super::*;
459
460 #[test]
461 fn create_hashes_for_decimal_array() -> Result<()> {
462 let array = vec![1, 2, 3, 4]
463 .into_iter()
464 .map(Some)
465 .collect::<Decimal128Array>()
466 .with_precision_and_scale(20, 3)
467 .unwrap();
468 let array_ref = Arc::new(array);
469 let random_state = RandomState::with_seeds(0, 0, 0, 0);
470 let hashes_buff = &mut vec![0; array_ref.len()];
471 let hashes = create_hashes(&[array_ref], &random_state, hashes_buff)?;
472 assert_eq!(hashes.len(), 4);
473 Ok(())
474 }
475
476 #[test]
477 fn create_hashes_for_empty_fixed_size_lit() -> Result<()> {
478 let empty_array = FixedSizeListBuilder::new(StringBuilder::new(), 1).finish();
479 let random_state = RandomState::with_seeds(0, 0, 0, 0);
480 let hashes_buff = &mut vec![0; 0];
481 let hashes = create_hashes(&[Arc::new(empty_array)], &random_state, hashes_buff)?;
482 assert_eq!(hashes, &Vec::<u64>::new());
483 Ok(())
484 }
485
486 #[test]
487 fn create_hashes_for_float_arrays() -> Result<()> {
488 let f32_arr = Arc::new(Float32Array::from(vec![0.12, 0.5, 1f32, 444.7]));
489 let f64_arr = Arc::new(Float64Array::from(vec![0.12, 0.5, 1f64, 444.7]));
490
491 let random_state = RandomState::with_seeds(0, 0, 0, 0);
492 let hashes_buff = &mut vec![0; f32_arr.len()];
493 let hashes = create_hashes(&[f32_arr], &random_state, hashes_buff)?;
494 assert_eq!(hashes.len(), 4,);
495
496 let hashes = create_hashes(&[f64_arr], &random_state, hashes_buff)?;
497 assert_eq!(hashes.len(), 4,);
498
499 Ok(())
500 }
501
502 macro_rules! create_hash_binary {
503 ($NAME:ident, $ARRAY:ty) => {
504 #[cfg(not(feature = "force_hash_collisions"))]
505 #[test]
506 fn $NAME() {
507 let binary = [
508 Some(b"short".to_byte_slice()),
509 None,
510 Some(b"long but different 12 bytes string"),
511 Some(b"short2"),
512 Some(b"Longer than 12 bytes string"),
513 Some(b"short"),
514 Some(b"Longer than 12 bytes string"),
515 ];
516
517 let binary_array = Arc::new(binary.iter().cloned().collect::<$ARRAY>());
518 let ref_array = Arc::new(binary.iter().cloned().collect::<BinaryArray>());
519
520 let random_state = RandomState::with_seeds(0, 0, 0, 0);
521
522 let mut binary_hashes = vec![0; binary.len()];
523 create_hashes(&[binary_array], &random_state, &mut binary_hashes)
524 .unwrap();
525
526 let mut ref_hashes = vec![0; binary.len()];
527 create_hashes(&[ref_array], &random_state, &mut ref_hashes).unwrap();
528
529 for (val, hash) in binary.iter().zip(binary_hashes.iter()) {
531 match val {
532 Some(_) => assert_ne!(*hash, 0),
533 None => assert_eq!(*hash, 0),
534 }
535 }
536
537 assert_eq!(binary_hashes, ref_hashes);
539
540 assert_eq!(binary[0], binary[5]);
542 assert_eq!(binary[4], binary[6]);
543
544 assert_ne!(binary[0], binary[2]);
546 }
547 };
548 }
549
550 create_hash_binary!(binary_array, BinaryArray);
551 create_hash_binary!(binary_view_array, BinaryViewArray);
552
553 #[test]
554 fn create_hashes_fixed_size_binary() -> Result<()> {
555 let input_arg = vec![vec![1, 2], vec![5, 6], vec![5, 6]];
556 let fixed_size_binary_array =
557 Arc::new(FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap());
558
559 let random_state = RandomState::with_seeds(0, 0, 0, 0);
560 let hashes_buff = &mut vec![0; fixed_size_binary_array.len()];
561 let hashes =
562 create_hashes(&[fixed_size_binary_array], &random_state, hashes_buff)?;
563 assert_eq!(hashes.len(), 3,);
564
565 Ok(())
566 }
567
568 macro_rules! create_hash_string {
569 ($NAME:ident, $ARRAY:ty) => {
570 #[cfg(not(feature = "force_hash_collisions"))]
571 #[test]
572 fn $NAME() {
573 let strings = [
574 Some("short"),
575 None,
576 Some("long but different 12 bytes string"),
577 Some("short2"),
578 Some("Longer than 12 bytes string"),
579 Some("short"),
580 Some("Longer than 12 bytes string"),
581 ];
582
583 let string_array = Arc::new(strings.iter().cloned().collect::<$ARRAY>());
584 let dict_array = Arc::new(
585 strings
586 .iter()
587 .cloned()
588 .collect::<DictionaryArray<Int8Type>>(),
589 );
590
591 let random_state = RandomState::with_seeds(0, 0, 0, 0);
592
593 let mut string_hashes = vec![0; strings.len()];
594 create_hashes(&[string_array], &random_state, &mut string_hashes)
595 .unwrap();
596
597 let mut dict_hashes = vec![0; strings.len()];
598 create_hashes(&[dict_array], &random_state, &mut dict_hashes).unwrap();
599
600 for (val, hash) in strings.iter().zip(string_hashes.iter()) {
602 match val {
603 Some(_) => assert_ne!(*hash, 0),
604 None => assert_eq!(*hash, 0),
605 }
606 }
607
608 assert_eq!(string_hashes, dict_hashes);
610
611 assert_eq!(strings[0], strings[5]);
613 assert_eq!(strings[4], strings[6]);
614
615 assert_ne!(strings[0], strings[2]);
617 }
618 };
619 }
620
621 create_hash_string!(string_array, StringArray);
622 create_hash_string!(large_string_array, LargeStringArray);
623 create_hash_string!(string_view_array, StringArray);
624 create_hash_string!(dict_string_array, DictionaryArray<Int8Type>);
625
626 #[test]
627 #[cfg(not(feature = "force_hash_collisions"))]
629 fn create_hashes_for_dict_arrays() {
630 let strings = [Some("foo"), None, Some("bar"), Some("foo"), None];
631
632 let string_array = Arc::new(strings.iter().cloned().collect::<StringArray>());
633 let dict_array = Arc::new(
634 strings
635 .iter()
636 .cloned()
637 .collect::<DictionaryArray<Int8Type>>(),
638 );
639
640 let random_state = RandomState::with_seeds(0, 0, 0, 0);
641
642 let mut string_hashes = vec![0; strings.len()];
643 create_hashes(&[string_array], &random_state, &mut string_hashes).unwrap();
644
645 let mut dict_hashes = vec![0; strings.len()];
646 create_hashes(&[dict_array], &random_state, &mut dict_hashes).unwrap();
647
648 for (val, hash) in strings.iter().zip(string_hashes.iter()) {
650 match val {
651 Some(_) => assert_ne!(*hash, 0),
652 None => assert_eq!(*hash, 0),
653 }
654 }
655
656 assert_eq!(string_hashes, dict_hashes);
658
659 assert_eq!(strings[1], strings[4]);
661 assert_eq!(dict_hashes[1], dict_hashes[4]);
662 assert_eq!(strings[0], strings[3]);
663 assert_eq!(dict_hashes[0], dict_hashes[3]);
664
665 assert_ne!(strings[0], strings[2]);
667 assert_ne!(dict_hashes[0], dict_hashes[2]);
668 }
669
670 #[test]
671 #[cfg(not(feature = "force_hash_collisions"))]
673 fn create_hashes_for_list_arrays() {
674 let data = vec![
675 Some(vec![Some(0), Some(1), Some(2)]),
676 None,
677 Some(vec![Some(3), None, Some(5)]),
678 Some(vec![Some(3), None, Some(5)]),
679 None,
680 Some(vec![Some(0), Some(1), Some(2)]),
681 Some(vec![]),
682 ];
683 let list_array =
684 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(data)) as ArrayRef;
685 let random_state = RandomState::with_seeds(0, 0, 0, 0);
686 let mut hashes = vec![0; list_array.len()];
687 create_hashes(&[list_array], &random_state, &mut hashes).unwrap();
688 assert_eq!(hashes[0], hashes[5]);
689 assert_eq!(hashes[1], hashes[4]);
690 assert_eq!(hashes[2], hashes[3]);
691 assert_eq!(hashes[1], hashes[6]); }
693
694 #[test]
695 #[cfg(not(feature = "force_hash_collisions"))]
697 fn create_hashes_for_fixed_size_list_arrays() {
698 let data = vec![
699 Some(vec![Some(0), Some(1), Some(2)]),
700 None,
701 Some(vec![Some(3), None, Some(5)]),
702 Some(vec![Some(3), None, Some(5)]),
703 None,
704 Some(vec![Some(0), Some(1), Some(2)]),
705 ];
706 let list_array =
707 Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
708 data, 3,
709 )) as ArrayRef;
710 let random_state = RandomState::with_seeds(0, 0, 0, 0);
711 let mut hashes = vec![0; list_array.len()];
712 create_hashes(&[list_array], &random_state, &mut hashes).unwrap();
713 assert_eq!(hashes[0], hashes[5]);
714 assert_eq!(hashes[1], hashes[4]);
715 assert_eq!(hashes[2], hashes[3]);
716 }
717
718 #[test]
719 #[cfg(not(feature = "force_hash_collisions"))]
721 fn create_hashes_for_struct_arrays() {
722 use arrow::buffer::Buffer;
723
724 let boolarr = Arc::new(BooleanArray::from(vec![
725 false, false, true, true, true, true,
726 ]));
727 let i32arr = Arc::new(Int32Array::from(vec![10, 10, 20, 20, 30, 31]));
728
729 let struct_array = StructArray::from((
730 vec![
731 (
732 Arc::new(Field::new("bool", DataType::Boolean, false)),
733 Arc::clone(&boolarr) as ArrayRef,
734 ),
735 (
736 Arc::new(Field::new("i32", DataType::Int32, false)),
737 Arc::clone(&i32arr) as ArrayRef,
738 ),
739 (
740 Arc::new(Field::new("i32", DataType::Int32, false)),
741 Arc::clone(&i32arr) as ArrayRef,
742 ),
743 (
744 Arc::new(Field::new("bool", DataType::Boolean, false)),
745 Arc::clone(&boolarr) as ArrayRef,
746 ),
747 ],
748 Buffer::from(&[0b001011]),
749 ));
750
751 assert!(struct_array.is_valid(0));
752 assert!(struct_array.is_valid(1));
753 assert!(struct_array.is_null(2));
754 assert!(struct_array.is_valid(3));
755 assert!(struct_array.is_null(4));
756 assert!(struct_array.is_null(5));
757
758 let array = Arc::new(struct_array) as ArrayRef;
759
760 let random_state = RandomState::with_seeds(0, 0, 0, 0);
761 let mut hashes = vec![0; array.len()];
762 create_hashes(&[array], &random_state, &mut hashes).unwrap();
763 assert_eq!(hashes[0], hashes[1]);
764 assert_ne!(hashes[2], hashes[3]);
766 assert_eq!(hashes[4], hashes[5]);
768 }
769
770 #[test]
771 #[cfg(not(feature = "force_hash_collisions"))]
773 fn create_hashes_for_struct_arrays_more_column_than_row() {
774 let struct_array = StructArray::from(vec![
775 (
776 Arc::new(Field::new("bool", DataType::Boolean, false)),
777 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
778 ),
779 (
780 Arc::new(Field::new("i32-1", DataType::Int32, false)),
781 Arc::new(Int32Array::from(vec![10, 10])) as ArrayRef,
782 ),
783 (
784 Arc::new(Field::new("i32-2", DataType::Int32, false)),
785 Arc::new(Int32Array::from(vec![10, 10])) as ArrayRef,
786 ),
787 (
788 Arc::new(Field::new("i32-3", DataType::Int32, false)),
789 Arc::new(Int32Array::from(vec![10, 10])) as ArrayRef,
790 ),
791 ]);
792
793 assert!(struct_array.is_valid(0));
794 assert!(struct_array.is_valid(1));
795
796 let array = Arc::new(struct_array) as ArrayRef;
797 let random_state = RandomState::with_seeds(0, 0, 0, 0);
798 let mut hashes = vec![0; array.len()];
799 create_hashes(&[array], &random_state, &mut hashes).unwrap();
800 assert_eq!(hashes[0], hashes[1]);
801 }
802
803 #[test]
804 #[cfg(not(feature = "force_hash_collisions"))]
806 fn create_hashes_for_map_arrays() {
807 let mut builder =
808 MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
809 builder.keys().append_value("key1");
811 builder.keys().append_value("key2");
812 builder.values().append_value(1);
813 builder.values().append_value(2);
814 builder.append(true).unwrap();
815 builder.keys().append_value("key1");
817 builder.keys().append_value("key2");
818 builder.values().append_value(1);
819 builder.values().append_value(2);
820 builder.append(true).unwrap();
821 builder.keys().append_value("key1");
823 builder.keys().append_value("key2");
824 builder.values().append_value(1);
825 builder.values().append_value(3);
826 builder.append(true).unwrap();
827 builder.keys().append_value("key1");
829 builder.keys().append_value("key3");
830 builder.values().append_value(1);
831 builder.values().append_value(2);
832 builder.append(true).unwrap();
833 builder.keys().append_value("key1");
835 builder.values().append_value(1);
836 builder.append(true).unwrap();
837 builder.keys().append_value("key1");
839 builder.values().append_null();
840 builder.append(true).unwrap();
841 builder.append(true).unwrap();
843 builder.keys().append_value("key1");
845 builder.values().append_value(1);
846 builder.append(false).unwrap();
847
848 let array = Arc::new(builder.finish()) as ArrayRef;
849
850 let random_state = RandomState::with_seeds(0, 0, 0, 0);
851 let mut hashes = vec![0; array.len()];
852 create_hashes(&[array], &random_state, &mut hashes).unwrap();
853 assert_eq!(hashes[0], hashes[1]); assert_ne!(hashes[0], hashes[2]); assert_ne!(hashes[0], hashes[3]); assert_ne!(hashes[0], hashes[4]); assert_ne!(hashes[4], hashes[5]); assert_eq!(hashes[6], hashes[7]); }
860
861 #[test]
862 #[cfg(not(feature = "force_hash_collisions"))]
864 fn create_multi_column_hash_for_dict_arrays() {
865 let strings1 = [Some("foo"), None, Some("bar")];
866 let strings2 = [Some("blarg"), Some("blah"), None];
867
868 let string_array = Arc::new(strings1.iter().cloned().collect::<StringArray>());
869 let dict_array = Arc::new(
870 strings2
871 .iter()
872 .cloned()
873 .collect::<DictionaryArray<Int32Type>>(),
874 );
875
876 let random_state = RandomState::with_seeds(0, 0, 0, 0);
877
878 let mut one_col_hashes = vec![0; strings1.len()];
879 create_hashes(
880 &[Arc::clone(&dict_array) as ArrayRef],
881 &random_state,
882 &mut one_col_hashes,
883 )
884 .unwrap();
885
886 let mut two_col_hashes = vec![0; strings1.len()];
887 create_hashes(
888 &[dict_array, string_array],
889 &random_state,
890 &mut two_col_hashes,
891 )
892 .unwrap();
893
894 assert_eq!(one_col_hashes.len(), 3);
895 assert_eq!(two_col_hashes.len(), 3);
896
897 assert_ne!(one_col_hashes, two_col_hashes);
898 }
899}