1use std::hash::{Hash, Hasher};
2
3use data_value::DataValue;
4use halfbrown::HashMap;
5
6use super::{column_store::ColumnFrame, Key};
7
8fn hash_datavalue(value: &DataValue) -> u64 {
10 use data_value::DataValue::*;
11
12 let mut hasher = std::collections::hash_map::DefaultHasher::new();
13
14 match value {
16 Null => hasher.write_u8(0),
17 Bool(b) => {
18 hasher.write_u8(1);
19 hasher.write_u8(*b as u8);
20 }
21 U8(u) => {
22 hasher.write_u8(2);
23 hasher.write_u8(*u);
24 }
25 U32(u) => {
26 hasher.write_u8(3);
27 hasher.write_u32(*u);
28 }
29 I32(i) => {
30 hasher.write_u8(4);
31 hasher.write_i32(*i);
32 }
33 U64(u) => {
34 hasher.write_u8(5);
35 hasher.write_u64(*u);
36 }
37 I64(i) => {
38 hasher.write_u8(6);
39 hasher.write_i64(*i);
40 }
41 F32(f) => {
42 hasher.write_u8(7);
43 hasher.write_u32(f.to_bits());
44 }
45 F64(f) => {
46 hasher.write_u8(8);
47 hasher.write_u64(f.to_bits());
48 }
49 U128(u) => {
50 hasher.write_u8(9);
51 hasher.write_u64((*u >> 64) as u64);
52 hasher.write_u64(*u as u64);
53 }
54 I128(i) => {
55 hasher.write_u8(10);
56 let u = *i as u128;
57 hasher.write_u64((u >> 64) as u64);
58 hasher.write_u64(u as u64);
59 }
60 String(s) => {
61 hasher.write_u8(11);
62 hasher.write(s.as_bytes());
63 }
64 Bytes(b) => {
65 hasher.write_u8(12);
66 hasher.write(b);
67 }
68 Vec(v) => {
69 hasher.write_u8(13);
70 hasher.write_usize(v.len());
71 for item in v {
72 let item_hash = hash_datavalue(item);
74 hasher.write_u64(item_hash);
75 }
76 }
77 Map(m) => {
78 hasher.write_u8(14);
79 hasher.write_usize(m.len());
80 let mut keys: std::vec::Vec<_> = m.keys().collect();
82 keys.sort();
83 for k in keys {
84 hasher.write(k.as_bytes());
85 if let Some(v) = m.get(k) {
86 let val_hash = hash_datavalue(v);
87 hasher.write_u64(val_hash);
88 }
89 }
90 }
91 EnumNumber(e) => {
92 hasher.write_u8(15);
93 hasher.write_i32(*e);
94 }
95 }
96
97 hasher.finish()
98}
99
100#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
101struct VecIndex {
102 hash: u64,
103}
104
105impl VecIndex {
106 pub fn new(value: &[DataValue]) -> Self {
107 let mut combined_hash: u64 = 0;
110 for v in value.iter() {
111 let h = hash_datavalue(v);
112 combined_hash = combined_hash.wrapping_mul(31).wrapping_add(h);
114 }
115 Self {
116 hash: combined_hash,
117 }
118 }
119}
120impl Hash for VecIndex {
121 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
122 state.write_u64(self.hash);
123 }
124}
125
126impl From<&[DataValue]> for VecIndex {
127 fn from(value: &[DataValue]) -> Self {
128 Self::new(value)
129 }
130}
131
132#[derive(Debug)]
133pub struct Index {
134 index: HashMap<VecIndex, usize>,
135}
136
137impl Index {
138 pub fn new(key: Vec<Key>, df: &ColumnFrame) -> Self {
141 let selected = df.select(Some(key.as_slice()));
142 let mut this = Self {
143 index: HashMap::new(),
144 };
145
146 for (index, candidate) in selected.rows().into_iter().enumerate() {
147 if let Some(slice) = candidate.as_slice() {
148 this.index.insert(VecIndex::from(slice), index);
149 }
150 }
151 this
152 }
153
154 pub fn get(&self, values: &[DataValue]) -> Option<usize> {
155 self.index.get(&VecIndex::from(values)).cloned()
156 }
157
158 pub fn join(self, other: Index) -> Vec<(usize, Option<usize>)> {
159 let mut output = Vec::with_capacity(self.index.len());
160 for (index, left_index) in self.index.into_iter() {
161 let idx = other.index.get(&index);
162 output.push((left_index, idx.cloned()));
163 }
164
165 output
166 }
167}
168
169#[cfg(test)]
170mod tests {
171 use super::*;
172 use data_value::DataValue;
173 use std::collections::HashMap;
174
175 #[test]
176 fn test_hash_datavalue_null() {
177 let val = DataValue::Null;
178 let hash1 = hash_datavalue(&val);
179 let hash2 = hash_datavalue(&val);
180 assert_eq!(hash1, hash2);
181 }
182
183 #[test]
184 fn test_hash_datavalue_bool() {
185 let true_val = DataValue::Bool(true);
186 let false_val = DataValue::Bool(false);
187
188 let hash_true = hash_datavalue(&true_val);
189 let hash_false = hash_datavalue(&false_val);
190
191 assert_eq!(hash_true, hash_datavalue(&true_val));
192 assert_eq!(hash_false, hash_datavalue(&false_val));
193 assert_ne!(hash_true, hash_false);
194 }
195
196 #[test]
197 fn test_hash_datavalue_u8() {
198 let val1 = DataValue::U8(0);
199 let val2 = DataValue::U8(255);
200 let val3 = DataValue::U8(42);
201
202 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
203 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
204 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
205 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
206 }
207
208 #[test]
209 fn test_hash_datavalue_u32() {
210 let val1 = DataValue::U32(0);
211 let val2 = DataValue::U32(u32::MAX);
212 let val3 = DataValue::U32(12345);
213
214 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
215 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
216 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
217 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
218 }
219
220 #[test]
221 fn test_hash_datavalue_i32() {
222 let val1 = DataValue::I32(0);
223 let val2 = DataValue::I32(-1);
224 let val3 = DataValue::I32(i32::MAX);
225 let val4 = DataValue::I32(i32::MIN);
226
227 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
228 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
229 assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
230 }
231
232 #[test]
233 fn test_hash_datavalue_u64() {
234 let val1 = DataValue::U64(0);
235 let val2 = DataValue::U64(u64::MAX);
236 let val3 = DataValue::U64(123456789);
237
238 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
239 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
240 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
241 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
242 }
243
244 #[test]
245 fn test_hash_datavalue_i64() {
246 let val1 = DataValue::I64(0);
247 let val2 = DataValue::I64(-1);
248 let val3 = DataValue::I64(i64::MAX);
249 let val4 = DataValue::I64(i64::MIN);
250
251 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
252 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
253 assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
254 }
255
256 #[test]
257 fn test_hash_datavalue_f32() {
258 let val1 = DataValue::F32(0.0);
259 let val2 = DataValue::F32(-0.0);
260 let val3 = DataValue::F32(3.14);
261 let val4 = DataValue::F32(f32::INFINITY);
262 let val5 = DataValue::F32(f32::NEG_INFINITY);
263 let val6 = DataValue::F32(f32::NAN);
264
265 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
266 assert_eq!(hash_datavalue(&val3), hash_datavalue(&val3));
267 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
268 assert_ne!(hash_datavalue(&val4), hash_datavalue(&val5));
269 assert_eq!(
270 hash_datavalue(&val6),
271 hash_datavalue(&DataValue::F32(f32::NAN))
272 );
273 }
274
275 #[test]
276 fn test_hash_datavalue_f64() {
277 let val1 = DataValue::F64(0.0);
278 let val2 = DataValue::F64(-0.0);
279 let val3 = DataValue::F64(3.14159);
280 let val4 = DataValue::F64(f64::INFINITY);
281 let val5 = DataValue::F64(f64::NEG_INFINITY);
282 let val6 = DataValue::F64(f64::NAN);
283
284 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
285 assert_eq!(hash_datavalue(&val3), hash_datavalue(&val3));
286 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
287 assert_ne!(hash_datavalue(&val4), hash_datavalue(&val5));
288 assert_eq!(
289 hash_datavalue(&val6),
290 hash_datavalue(&DataValue::F64(f64::NAN))
291 );
292 }
293
294 #[test]
295 fn test_hash_datavalue_u128() {
296 let val1 = DataValue::U128(0);
297 let val2 = DataValue::U128(u128::MAX);
298 let val3 = DataValue::U128(12345678901234567890);
299
300 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
301 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
302 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
303 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
304 }
305
306 #[test]
307 fn test_hash_datavalue_i128() {
308 let val1 = DataValue::I128(0);
309 let val2 = DataValue::I128(-1);
310 let val3 = DataValue::I128(i128::MAX);
311 let val4 = DataValue::I128(i128::MIN);
312
313 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
314 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
315 assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
316 }
317
318 #[test]
319 fn test_hash_datavalue_string() {
320 let val1 = DataValue::String("hello".into());
321 let val2 = DataValue::String("world".into());
322 let val3 = DataValue::String("hello".into());
323 let val4 = DataValue::String("".into());
324
325 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val3));
326 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
327 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val4));
328 assert_eq!(hash_datavalue(&val4), hash_datavalue(&val4));
329 }
330
331 #[test]
332 fn test_hash_datavalue_bytes() {
333 let val1 = DataValue::Bytes(vec![1, 2, 3]);
334 let val2 = DataValue::Bytes(vec![1, 2, 3]);
335 let val3 = DataValue::Bytes(vec![3, 2, 1]);
336 let val4 = DataValue::Bytes(vec![]);
337
338 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
339 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
340 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val4));
341 assert_eq!(
342 hash_datavalue(&val4),
343 hash_datavalue(&DataValue::Bytes(vec![]))
344 );
345 }
346
347 #[test]
348 fn test_hash_datavalue_vec_empty() {
349 let val = DataValue::Vec(vec![]);
350 assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
351 }
352
353 #[test]
354 fn test_hash_datavalue_vec_basic() {
355 let val1 = DataValue::Vec(vec![
356 DataValue::I32(1),
357 DataValue::I32(2),
358 DataValue::I32(3),
359 ]);
360 let val2 = DataValue::Vec(vec![
361 DataValue::I32(1),
362 DataValue::I32(2),
363 DataValue::I32(3),
364 ]);
365 let val3 = DataValue::Vec(vec![
366 DataValue::I32(3),
367 DataValue::I32(2),
368 DataValue::I32(1),
369 ]);
370
371 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
372 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
373 }
374
375 #[test]
376 fn test_hash_datavalue_vec_nested() {
377 let val1 = DataValue::Vec(vec![
378 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
379 DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(4)]),
380 ]);
381 let val2 = DataValue::Vec(vec![
382 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
383 DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(4)]),
384 ]);
385 let val3 = DataValue::Vec(vec![
386 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
387 DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(5)]),
388 ]);
389
390 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
391 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
392 }
393
394 #[test]
395 fn test_hash_datavalue_vec_mixed_types() {
396 let val = DataValue::Vec(vec![
397 DataValue::I32(42),
398 DataValue::String("test".into()),
399 DataValue::Bool(true),
400 DataValue::Null,
401 ]);
402
403 assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
404 }
405
406 #[test]
407 fn test_hash_datavalue_map_empty() {
408 let val = DataValue::Map(HashMap::new());
409 assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
410 }
411
412 #[test]
413 fn test_hash_datavalue_map_basic() {
414 let mut map1 = HashMap::new();
415 map1.insert("key1".into(), DataValue::I32(1));
416 map1.insert("key2".into(), DataValue::I32(2));
417
418 let mut map2 = HashMap::new();
419 map2.insert("key1".into(), DataValue::I32(1));
420 map2.insert("key2".into(), DataValue::I32(2));
421
422 let val1 = DataValue::Map(map1);
423 let val2 = DataValue::Map(map2);
424
425 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
426 }
427
428 #[test]
429 fn test_hash_datavalue_map_insertion_order_independence() {
430 let mut map1 = HashMap::new();
431 map1.insert("a".into(), DataValue::I32(1));
432 map1.insert("b".into(), DataValue::I32(2));
433 map1.insert("c".into(), DataValue::I32(3));
434
435 let mut map2 = HashMap::new();
436 map2.insert("c".into(), DataValue::I32(3));
437 map2.insert("a".into(), DataValue::I32(1));
438 map2.insert("b".into(), DataValue::I32(2));
439
440 let val1 = DataValue::Map(map1);
441 let val2 = DataValue::Map(map2);
442
443 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
444 }
445
446 #[test]
447 fn test_hash_datavalue_map_different_values() {
448 let mut map1 = HashMap::new();
449 map1.insert("key".into(), DataValue::I32(1));
450
451 let mut map2 = HashMap::new();
452 map2.insert("key".into(), DataValue::I32(2));
453
454 let val1 = DataValue::Map(map1);
455 let val2 = DataValue::Map(map2);
456
457 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
458 }
459
460 #[test]
461 fn test_hash_datavalue_map_nested() {
462 let mut inner_map = HashMap::new();
463 inner_map.insert("inner".into(), DataValue::I32(42));
464
465 let mut outer_map = HashMap::new();
466 outer_map.insert("outer".into(), DataValue::Map(inner_map.clone()));
467
468 let mut outer_map2 = HashMap::new();
469 outer_map2.insert("outer".into(), DataValue::Map(inner_map));
470
471 let val1 = DataValue::Map(outer_map);
472 let val2 = DataValue::Map(outer_map2);
473
474 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
475 }
476
477 #[test]
478 fn test_hash_datavalue_enum_number() {
479 let val1 = DataValue::EnumNumber(0);
480 let val2 = DataValue::EnumNumber(1);
481 let val3 = DataValue::EnumNumber(-1);
482 let val4 = DataValue::EnumNumber(0);
483
484 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val4));
485 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
486 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
487 }
488
489 #[test]
490 fn test_hash_datavalue_type_differentiation() {
491 let u32_val = DataValue::U32(42);
492 let i32_val = DataValue::I32(42);
493 let u64_val = DataValue::U64(42);
494 let i64_val = DataValue::I64(42);
495 let f32_val = DataValue::F32(42.0);
496 let f64_val = DataValue::F64(42.0);
497
498 let u32_hash = hash_datavalue(&u32_val);
499 let i32_hash = hash_datavalue(&i32_val);
500 let u64_hash = hash_datavalue(&u64_val);
501 let i64_hash = hash_datavalue(&i64_val);
502 let f32_hash = hash_datavalue(&f32_val);
503 let f64_hash = hash_datavalue(&f64_val);
504
505 assert_ne!(u32_hash, i32_hash);
506 assert_ne!(u32_hash, u64_hash);
507 assert_ne!(u32_hash, i64_hash);
508 assert_ne!(i32_hash, u64_hash);
509 assert_ne!(i32_hash, i64_hash);
510 assert_ne!(u64_hash, i64_hash);
511 assert_ne!(f32_hash, f64_hash);
512 }
513
514 #[test]
515 fn test_hash_datavalue_null_vs_zero() {
516 let null_val = DataValue::Null;
517 let zero_i32 = DataValue::I32(0);
518 let zero_u32 = DataValue::U32(0);
519 let false_bool = DataValue::Bool(false);
520
521 let null_hash = hash_datavalue(&null_val);
522 let zero_i32_hash = hash_datavalue(&zero_i32);
523 let zero_u32_hash = hash_datavalue(&zero_u32);
524 let false_hash = hash_datavalue(&false_bool);
525
526 assert_ne!(null_hash, zero_i32_hash);
527 assert_ne!(null_hash, zero_u32_hash);
528 assert_ne!(null_hash, false_hash);
529 }
530
531 #[test]
532 fn test_hash_datavalue_string_vs_bytes() {
533 let string_val = DataValue::String("hello".into());
534 let bytes_val = DataValue::Bytes(b"hello".to_vec());
535
536 assert_ne!(hash_datavalue(&string_val), hash_datavalue(&bytes_val));
537 }
538
539 #[test]
540 fn test_hash_datavalue_consistency() {
541 let test_values = vec![
542 DataValue::Null,
543 DataValue::Bool(true),
544 DataValue::U8(255),
545 DataValue::I32(-42),
546 DataValue::String("test".into()),
547 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
548 ];
549
550 for val in test_values {
551 let hash1 = hash_datavalue(&val);
552 let hash2 = hash_datavalue(&val);
553 let hash3 = hash_datavalue(&val);
554 assert_eq!(hash1, hash2);
555 assert_eq!(hash2, hash3);
556 }
557 }
558}