1use std::hash::{Hash, Hasher};
2
3use data_value::DataValue;
4use halfbrown::HashMap;
5
6use super::{column_store::ColumnFrame, Key};
7
8pub fn hash_datavalue(value: &DataValue) -> u64 {
10 use data_value::DataValue::*;
11
12 let mut hasher = std::collections::hash_map::DefaultHasher::new();
13
14 match value {
16 Null => hasher.write_u8(0),
17 Bool(b) => {
18 hasher.write_u8(1);
19 hasher.write_u8(*b as u8);
20 }
21 U8(u) => {
22 hasher.write_u8(2);
23 hasher.write_u8(*u);
24 }
25 U32(u) => {
26 hasher.write_u8(3);
27 hasher.write_u32(*u);
28 }
29 I32(i) => {
30 hasher.write_u8(4);
31 hasher.write_i32(*i);
32 }
33 U64(u) => {
34 hasher.write_u8(5);
35 hasher.write_u64(*u);
36 }
37 I64(i) => {
38 hasher.write_u8(6);
39 hasher.write_i64(*i);
40 }
41 F32(f) => {
42 hasher.write_u8(7);
43 hasher.write_u32(f.to_bits());
44 }
45 F64(f) => {
46 hasher.write_u8(8);
47 hasher.write_u64(f.to_bits());
48 }
49 U128(u) => {
50 hasher.write_u8(9);
51 hasher.write_u64((*u >> 64) as u64);
52 hasher.write_u64(*u as u64);
53 }
54 I128(i) => {
55 hasher.write_u8(10);
56 let u = *i as u128;
57 hasher.write_u64((u >> 64) as u64);
58 hasher.write_u64(u as u64);
59 }
60 String(s) => {
61 hasher.write_u8(11);
62 hasher.write(s.as_bytes());
63 }
64 Bytes(b) => {
65 hasher.write_u8(12);
66 hasher.write(b);
67 }
68 Vec(v) => {
69 hasher.write_u8(13);
70 hasher.write_usize(v.len());
71 for item in v {
72 let item_hash = hash_datavalue(item);
74 hasher.write_u64(item_hash);
75 }
76 }
77 Map(m) => {
78 hasher.write_u8(14);
79 hasher.write_usize(m.len());
80 let mut keys: std::vec::Vec<_> = m.keys().collect();
82 keys.sort();
83 for k in keys {
84 hasher.write(k.as_bytes());
85 if let Some(v) = m.get(k) {
86 let val_hash = hash_datavalue(v);
87 hasher.write_u64(val_hash);
88 }
89 }
90 }
91 EnumNumber(e) => {
92 hasher.write_u8(15);
93 hasher.write_i32(*e);
94 }
95 }
96
97 hasher.finish()
98}
99
100#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
101struct VecIndex {
102 hash: u64,
103}
104
105impl VecIndex {
106 pub fn new(value: &[DataValue]) -> Self {
107 let mut combined_hash: u64 = 0;
110 for v in value.iter() {
111 let h = hash_datavalue(v);
112 combined_hash = combined_hash.wrapping_mul(31).wrapping_add(h);
114 }
115 Self {
116 hash: combined_hash,
117 }
118 }
119}
120impl Hash for VecIndex {
121 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
122 state.write_u64(self.hash);
123 }
124}
125
126impl From<&[DataValue]> for VecIndex {
127 fn from(value: &[DataValue]) -> Self {
128 Self::new(value)
129 }
130}
131
132#[derive(Debug)]
134pub struct Index {
135 index: HashMap<VecIndex, Vec<usize>>,
136}
137
138impl Index {
139 pub fn new(key: Vec<Key>, df: &ColumnFrame) -> Self {
142 let selected = df.select(Some(key.as_slice()));
143 let mut this = Self {
144 index: HashMap::new(),
145 };
146
147 for (index, candidate) in selected.rows().into_iter().enumerate() {
148 if let Some(slice) = candidate.as_slice() {
149 this.index
150 .entry(VecIndex::from(slice))
151 .or_default()
152 .push(index);
153 }
154 }
155 this
156 }
157
158 pub fn get(&self, values: &[DataValue]) -> Option<&[usize]> {
160 self.index
161 .get(&VecIndex::from(values))
162 .map(|idx| idx.as_slice())
163 }
164
165 pub fn join(self, other: Index) -> Vec<(Vec<usize>, Vec<usize>)> {
167 let mut output = Vec::with_capacity(self.index.len());
168 for (index, left_index) in self.index.into_iter() {
169 if let Some(right_idx) = other.index.get(&index) {
170 output.push((left_index, right_idx.clone()));
171 }
172 }
173
174 output
175 }
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181 use data_value::DataValue;
182 use std::collections::HashMap;
183
184 #[test]
185 fn test_hash_datavalue_null() {
186 let val = DataValue::Null;
187 let hash1 = hash_datavalue(&val);
188 let hash2 = hash_datavalue(&val);
189 assert_eq!(hash1, hash2);
190 }
191
192 #[test]
193 fn test_hash_datavalue_bool() {
194 let true_val = DataValue::Bool(true);
195 let false_val = DataValue::Bool(false);
196
197 let hash_true = hash_datavalue(&true_val);
198 let hash_false = hash_datavalue(&false_val);
199
200 assert_eq!(hash_true, hash_datavalue(&true_val));
201 assert_eq!(hash_false, hash_datavalue(&false_val));
202 assert_ne!(hash_true, hash_false);
203 }
204
205 #[test]
206 fn test_hash_datavalue_u8() {
207 let val1 = DataValue::U8(0);
208 let val2 = DataValue::U8(255);
209 let val3 = DataValue::U8(42);
210
211 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
212 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
213 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
214 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
215 }
216
217 #[test]
218 fn test_hash_datavalue_u32() {
219 let val1 = DataValue::U32(0);
220 let val2 = DataValue::U32(u32::MAX);
221 let val3 = DataValue::U32(12345);
222
223 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
224 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
225 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
226 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
227 }
228
229 #[test]
230 fn test_hash_datavalue_i32() {
231 let val1 = DataValue::I32(0);
232 let val2 = DataValue::I32(-1);
233 let val3 = DataValue::I32(i32::MAX);
234 let val4 = DataValue::I32(i32::MIN);
235
236 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
237 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
238 assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
239 }
240
241 #[test]
242 fn test_hash_datavalue_u64() {
243 let val1 = DataValue::U64(0);
244 let val2 = DataValue::U64(u64::MAX);
245 let val3 = DataValue::U64(123456789);
246
247 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
248 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
249 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
250 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
251 }
252
253 #[test]
254 fn test_hash_datavalue_i64() {
255 let val1 = DataValue::I64(0);
256 let val2 = DataValue::I64(-1);
257 let val3 = DataValue::I64(i64::MAX);
258 let val4 = DataValue::I64(i64::MIN);
259
260 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
261 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
262 assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
263 }
264
265 #[test]
266 fn test_hash_datavalue_f32() {
267 let val1 = DataValue::F32(0.0);
268 let val2 = DataValue::F32(-0.0);
269 let val3 = DataValue::F32(3.14);
270 let val4 = DataValue::F32(f32::INFINITY);
271 let val5 = DataValue::F32(f32::NEG_INFINITY);
272 let val6 = DataValue::F32(f32::NAN);
273
274 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
275 assert_eq!(hash_datavalue(&val3), hash_datavalue(&val3));
276 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
277 assert_ne!(hash_datavalue(&val4), hash_datavalue(&val5));
278 assert_eq!(
279 hash_datavalue(&val6),
280 hash_datavalue(&DataValue::F32(f32::NAN))
281 );
282 }
283
284 #[test]
285 fn test_hash_datavalue_f64() {
286 let val1 = DataValue::F64(0.0);
287 let val2 = DataValue::F64(-0.0);
288 let val3 = DataValue::F64(3.14159);
289 let val4 = DataValue::F64(f64::INFINITY);
290 let val5 = DataValue::F64(f64::NEG_INFINITY);
291 let val6 = DataValue::F64(f64::NAN);
292
293 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
294 assert_eq!(hash_datavalue(&val3), hash_datavalue(&val3));
295 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
296 assert_ne!(hash_datavalue(&val4), hash_datavalue(&val5));
297 assert_eq!(
298 hash_datavalue(&val6),
299 hash_datavalue(&DataValue::F64(f64::NAN))
300 );
301 }
302
303 #[test]
304 fn test_hash_datavalue_u128() {
305 let val1 = DataValue::U128(0);
306 let val2 = DataValue::U128(u128::MAX);
307 let val3 = DataValue::U128(12345678901234567890);
308
309 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
310 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
311 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
312 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
313 }
314
315 #[test]
316 fn test_hash_datavalue_i128() {
317 let val1 = DataValue::I128(0);
318 let val2 = DataValue::I128(-1);
319 let val3 = DataValue::I128(i128::MAX);
320 let val4 = DataValue::I128(i128::MIN);
321
322 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
323 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
324 assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
325 }
326
327 #[test]
328 fn test_hash_datavalue_string() {
329 let val1 = DataValue::String("hello".into());
330 let val2 = DataValue::String("world".into());
331 let val3 = DataValue::String("hello".into());
332 let val4 = DataValue::String("".into());
333
334 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val3));
335 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
336 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val4));
337 assert_eq!(hash_datavalue(&val4), hash_datavalue(&val4));
338 }
339
340 #[test]
341 fn test_hash_datavalue_bytes() {
342 let val1 = DataValue::Bytes(vec![1, 2, 3]);
343 let val2 = DataValue::Bytes(vec![1, 2, 3]);
344 let val3 = DataValue::Bytes(vec![3, 2, 1]);
345 let val4 = DataValue::Bytes(vec![]);
346
347 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
348 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
349 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val4));
350 assert_eq!(
351 hash_datavalue(&val4),
352 hash_datavalue(&DataValue::Bytes(vec![]))
353 );
354 }
355
356 #[test]
357 fn test_hash_datavalue_vec_empty() {
358 let val = DataValue::Vec(vec![]);
359 assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
360 }
361
362 #[test]
363 fn test_hash_datavalue_vec_basic() {
364 let val1 = DataValue::Vec(vec![
365 DataValue::I32(1),
366 DataValue::I32(2),
367 DataValue::I32(3),
368 ]);
369 let val2 = DataValue::Vec(vec![
370 DataValue::I32(1),
371 DataValue::I32(2),
372 DataValue::I32(3),
373 ]);
374 let val3 = DataValue::Vec(vec![
375 DataValue::I32(3),
376 DataValue::I32(2),
377 DataValue::I32(1),
378 ]);
379
380 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
381 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
382 }
383
384 #[test]
385 fn test_hash_datavalue_vec_nested() {
386 let val1 = DataValue::Vec(vec![
387 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
388 DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(4)]),
389 ]);
390 let val2 = DataValue::Vec(vec![
391 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
392 DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(4)]),
393 ]);
394 let val3 = DataValue::Vec(vec![
395 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
396 DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(5)]),
397 ]);
398
399 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
400 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
401 }
402
403 #[test]
404 fn test_hash_datavalue_vec_mixed_types() {
405 let val = DataValue::Vec(vec![
406 DataValue::I32(42),
407 DataValue::String("test".into()),
408 DataValue::Bool(true),
409 DataValue::Null,
410 ]);
411
412 assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
413 }
414
415 #[test]
416 fn test_hash_datavalue_map_empty() {
417 let val = DataValue::Map(HashMap::new());
418 assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
419 }
420
421 #[test]
422 fn test_hash_datavalue_map_basic() {
423 let mut map1 = HashMap::new();
424 map1.insert("key1".into(), DataValue::I32(1));
425 map1.insert("key2".into(), DataValue::I32(2));
426
427 let mut map2 = HashMap::new();
428 map2.insert("key1".into(), DataValue::I32(1));
429 map2.insert("key2".into(), DataValue::I32(2));
430
431 let val1 = DataValue::Map(map1);
432 let val2 = DataValue::Map(map2);
433
434 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
435 }
436
437 #[test]
438 fn test_hash_datavalue_map_insertion_order_independence() {
439 let mut map1 = HashMap::new();
440 map1.insert("a".into(), DataValue::I32(1));
441 map1.insert("b".into(), DataValue::I32(2));
442 map1.insert("c".into(), DataValue::I32(3));
443
444 let mut map2 = HashMap::new();
445 map2.insert("c".into(), DataValue::I32(3));
446 map2.insert("a".into(), DataValue::I32(1));
447 map2.insert("b".into(), DataValue::I32(2));
448
449 let val1 = DataValue::Map(map1);
450 let val2 = DataValue::Map(map2);
451
452 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
453 }
454
455 #[test]
456 fn test_hash_datavalue_map_different_values() {
457 let mut map1 = HashMap::new();
458 map1.insert("key".into(), DataValue::I32(1));
459
460 let mut map2 = HashMap::new();
461 map2.insert("key".into(), DataValue::I32(2));
462
463 let val1 = DataValue::Map(map1);
464 let val2 = DataValue::Map(map2);
465
466 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
467 }
468
469 #[test]
470 fn test_hash_datavalue_map_nested() {
471 let mut inner_map = HashMap::new();
472 inner_map.insert("inner".into(), DataValue::I32(42));
473
474 let mut outer_map = HashMap::new();
475 outer_map.insert("outer".into(), DataValue::Map(inner_map.clone()));
476
477 let mut outer_map2 = HashMap::new();
478 outer_map2.insert("outer".into(), DataValue::Map(inner_map));
479
480 let val1 = DataValue::Map(outer_map);
481 let val2 = DataValue::Map(outer_map2);
482
483 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
484 }
485
486 #[test]
487 fn test_hash_datavalue_enum_number() {
488 let val1 = DataValue::EnumNumber(0);
489 let val2 = DataValue::EnumNumber(1);
490 let val3 = DataValue::EnumNumber(-1);
491 let val4 = DataValue::EnumNumber(0);
492
493 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val4));
494 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
495 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
496 }
497
498 #[test]
499 fn test_hash_datavalue_type_differentiation() {
500 let u32_val = DataValue::U32(42);
501 let i32_val = DataValue::I32(42);
502 let u64_val = DataValue::U64(42);
503 let i64_val = DataValue::I64(42);
504 let f32_val = DataValue::F32(42.0);
505 let f64_val = DataValue::F64(42.0);
506
507 let u32_hash = hash_datavalue(&u32_val);
508 let i32_hash = hash_datavalue(&i32_val);
509 let u64_hash = hash_datavalue(&u64_val);
510 let i64_hash = hash_datavalue(&i64_val);
511 let f32_hash = hash_datavalue(&f32_val);
512 let f64_hash = hash_datavalue(&f64_val);
513
514 assert_ne!(u32_hash, i32_hash);
515 assert_ne!(u32_hash, u64_hash);
516 assert_ne!(u32_hash, i64_hash);
517 assert_ne!(i32_hash, u64_hash);
518 assert_ne!(i32_hash, i64_hash);
519 assert_ne!(u64_hash, i64_hash);
520 assert_ne!(f32_hash, f64_hash);
521 }
522
523 #[test]
524 fn test_hash_datavalue_null_vs_zero() {
525 let null_val = DataValue::Null;
526 let zero_i32 = DataValue::I32(0);
527 let zero_u32 = DataValue::U32(0);
528 let false_bool = DataValue::Bool(false);
529
530 let null_hash = hash_datavalue(&null_val);
531 let zero_i32_hash = hash_datavalue(&zero_i32);
532 let zero_u32_hash = hash_datavalue(&zero_u32);
533 let false_hash = hash_datavalue(&false_bool);
534
535 assert_ne!(null_hash, zero_i32_hash);
536 assert_ne!(null_hash, zero_u32_hash);
537 assert_ne!(null_hash, false_hash);
538 }
539
540 #[test]
541 fn test_hash_datavalue_string_vs_bytes() {
542 let string_val = DataValue::String("hello".into());
543 let bytes_val = DataValue::Bytes(b"hello".to_vec());
544
545 assert_ne!(hash_datavalue(&string_val), hash_datavalue(&bytes_val));
546 }
547
548 #[test]
549 fn test_hash_datavalue_consistency() {
550 let test_values = vec![
551 DataValue::Null,
552 DataValue::Bool(true),
553 DataValue::U8(255),
554 DataValue::I32(-42),
555 DataValue::String("test".into()),
556 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
557 ];
558
559 for val in test_values {
560 let hash1 = hash_datavalue(&val);
561 let hash2 = hash_datavalue(&val);
562 let hash3 = hash_datavalue(&val);
563 assert_eq!(hash1, hash2);
564 assert_eq!(hash2, hash3);
565 }
566 }
567}