1use std::hash::{Hash, Hasher};
2
3use data_value::DataValue;
4use halfbrown::HashMap;
5
6use super::{column_store::ColumnFrame, Key};
7
8fn hash_datavalue(value: &DataValue) -> u64 {
10 use data_value::DataValue::*;
11
12 let mut hasher = std::collections::hash_map::DefaultHasher::new();
13
14 match value {
16 Null => hasher.write_u8(0),
17 Bool(b) => {
18 hasher.write_u8(1);
19 hasher.write_u8(*b as u8);
20 }
21 U8(u) => {
22 hasher.write_u8(2);
23 hasher.write_u8(*u);
24 }
25 U32(u) => {
26 hasher.write_u8(3);
27 hasher.write_u32(*u);
28 }
29 I32(i) => {
30 hasher.write_u8(4);
31 hasher.write_i32(*i);
32 }
33 U64(u) => {
34 hasher.write_u8(5);
35 hasher.write_u64(*u);
36 }
37 I64(i) => {
38 hasher.write_u8(6);
39 hasher.write_i64(*i);
40 }
41 F32(f) => {
42 hasher.write_u8(7);
43 hasher.write_u32(f.to_bits());
44 }
45 F64(f) => {
46 hasher.write_u8(8);
47 hasher.write_u64(f.to_bits());
48 }
49 U128(u) => {
50 hasher.write_u8(9);
51 hasher.write_u64((*u >> 64) as u64);
52 hasher.write_u64(*u as u64);
53 }
54 I128(i) => {
55 hasher.write_u8(10);
56 let u = *i as u128;
57 hasher.write_u64((u >> 64) as u64);
58 hasher.write_u64(u as u64);
59 }
60 String(s) => {
61 hasher.write_u8(11);
62 hasher.write(s.as_bytes());
63 }
64 Bytes(b) => {
65 hasher.write_u8(12);
66 hasher.write(b);
67 }
68 Vec(v) => {
69 hasher.write_u8(13);
70 hasher.write_usize(v.len());
71 for item in v {
72 let item_hash = hash_datavalue(item);
74 hasher.write_u64(item_hash);
75 }
76 }
77 Map(m) => {
78 hasher.write_u8(14);
79 hasher.write_usize(m.len());
80 let mut keys: std::vec::Vec<_> = m.keys().collect();
82 keys.sort();
83 for k in keys {
84 hasher.write(k.as_bytes());
85 if let Some(v) = m.get(k) {
86 let val_hash = hash_datavalue(v);
87 hasher.write_u64(val_hash);
88 }
89 }
90 }
91 EnumNumber(e) => {
92 hasher.write_u8(15);
93 hasher.write_i32(*e);
94 }
95 }
96
97 hasher.finish()
98}
99
100#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
101struct VecIndex {
102 hash: u64,
103}
104
105impl VecIndex {
106 pub fn new(value: &[DataValue]) -> Self {
107 let mut combined_hash: u64 = 0;
110 for v in value.iter() {
111 let h = hash_datavalue(v);
112 combined_hash = combined_hash.wrapping_mul(31).wrapping_add(h);
114 }
115 Self {
116 hash: combined_hash,
117 }
118 }
119}
120impl Hash for VecIndex {
121 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
122 state.write_u64(self.hash);
123 }
124}
125
126impl From<&[DataValue]> for VecIndex {
127 fn from(value: &[DataValue]) -> Self {
128 Self::new(value)
129 }
130}
131
132#[derive(Debug)]
133pub struct Index {
134 index: HashMap<VecIndex, Vec<usize>>,
135}
136
137impl Index {
138 pub fn new(key: Vec<Key>, df: &ColumnFrame) -> Self {
141 let selected = df.select(Some(key.as_slice()));
142 let mut this = Self {
143 index: HashMap::new(),
144 };
145
146 for (index, candidate) in selected.rows().into_iter().enumerate() {
147 if let Some(slice) = candidate.as_slice() {
148 this.index
149 .entry(VecIndex::from(slice))
150 .or_default()
151 .push(index);
152 }
153 }
154 this
155 }
156
157 pub fn get(&self, values: &[DataValue]) -> Option<&[usize]> {
158 self.index
159 .get(&VecIndex::from(values))
160 .map(|idx| idx.as_slice())
161 }
162
163 pub fn join(self, other: Index) -> Vec<(Vec<usize>, Vec<usize>)> {
164 let mut output = Vec::with_capacity(self.index.len());
165 for (index, left_index) in self.index.into_iter() {
166 if let Some(right_idx) = other.index.get(&index) {
167 output.push((left_index, right_idx.clone()));
168 }
169 }
170
171 output
172 }
173}
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178 use data_value::DataValue;
179 use std::collections::HashMap;
180
181 #[test]
182 fn test_hash_datavalue_null() {
183 let val = DataValue::Null;
184 let hash1 = hash_datavalue(&val);
185 let hash2 = hash_datavalue(&val);
186 assert_eq!(hash1, hash2);
187 }
188
189 #[test]
190 fn test_hash_datavalue_bool() {
191 let true_val = DataValue::Bool(true);
192 let false_val = DataValue::Bool(false);
193
194 let hash_true = hash_datavalue(&true_val);
195 let hash_false = hash_datavalue(&false_val);
196
197 assert_eq!(hash_true, hash_datavalue(&true_val));
198 assert_eq!(hash_false, hash_datavalue(&false_val));
199 assert_ne!(hash_true, hash_false);
200 }
201
202 #[test]
203 fn test_hash_datavalue_u8() {
204 let val1 = DataValue::U8(0);
205 let val2 = DataValue::U8(255);
206 let val3 = DataValue::U8(42);
207
208 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
209 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
210 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
211 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
212 }
213
214 #[test]
215 fn test_hash_datavalue_u32() {
216 let val1 = DataValue::U32(0);
217 let val2 = DataValue::U32(u32::MAX);
218 let val3 = DataValue::U32(12345);
219
220 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
221 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
222 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
223 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
224 }
225
226 #[test]
227 fn test_hash_datavalue_i32() {
228 let val1 = DataValue::I32(0);
229 let val2 = DataValue::I32(-1);
230 let val3 = DataValue::I32(i32::MAX);
231 let val4 = DataValue::I32(i32::MIN);
232
233 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
234 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
235 assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
236 }
237
238 #[test]
239 fn test_hash_datavalue_u64() {
240 let val1 = DataValue::U64(0);
241 let val2 = DataValue::U64(u64::MAX);
242 let val3 = DataValue::U64(123456789);
243
244 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
245 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
246 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
247 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
248 }
249
250 #[test]
251 fn test_hash_datavalue_i64() {
252 let val1 = DataValue::I64(0);
253 let val2 = DataValue::I64(-1);
254 let val3 = DataValue::I64(i64::MAX);
255 let val4 = DataValue::I64(i64::MIN);
256
257 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
258 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
259 assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
260 }
261
262 #[test]
263 fn test_hash_datavalue_f32() {
264 let val1 = DataValue::F32(0.0);
265 let val2 = DataValue::F32(-0.0);
266 let val3 = DataValue::F32(3.14);
267 let val4 = DataValue::F32(f32::INFINITY);
268 let val5 = DataValue::F32(f32::NEG_INFINITY);
269 let val6 = DataValue::F32(f32::NAN);
270
271 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
272 assert_eq!(hash_datavalue(&val3), hash_datavalue(&val3));
273 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
274 assert_ne!(hash_datavalue(&val4), hash_datavalue(&val5));
275 assert_eq!(
276 hash_datavalue(&val6),
277 hash_datavalue(&DataValue::F32(f32::NAN))
278 );
279 }
280
281 #[test]
282 fn test_hash_datavalue_f64() {
283 let val1 = DataValue::F64(0.0);
284 let val2 = DataValue::F64(-0.0);
285 let val3 = DataValue::F64(3.14159);
286 let val4 = DataValue::F64(f64::INFINITY);
287 let val5 = DataValue::F64(f64::NEG_INFINITY);
288 let val6 = DataValue::F64(f64::NAN);
289
290 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
291 assert_eq!(hash_datavalue(&val3), hash_datavalue(&val3));
292 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
293 assert_ne!(hash_datavalue(&val4), hash_datavalue(&val5));
294 assert_eq!(
295 hash_datavalue(&val6),
296 hash_datavalue(&DataValue::F64(f64::NAN))
297 );
298 }
299
300 #[test]
301 fn test_hash_datavalue_u128() {
302 let val1 = DataValue::U128(0);
303 let val2 = DataValue::U128(u128::MAX);
304 let val3 = DataValue::U128(12345678901234567890);
305
306 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
307 assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
308 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
309 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
310 }
311
312 #[test]
313 fn test_hash_datavalue_i128() {
314 let val1 = DataValue::I128(0);
315 let val2 = DataValue::I128(-1);
316 let val3 = DataValue::I128(i128::MAX);
317 let val4 = DataValue::I128(i128::MIN);
318
319 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
320 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
321 assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
322 }
323
324 #[test]
325 fn test_hash_datavalue_string() {
326 let val1 = DataValue::String("hello".into());
327 let val2 = DataValue::String("world".into());
328 let val3 = DataValue::String("hello".into());
329 let val4 = DataValue::String("".into());
330
331 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val3));
332 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
333 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val4));
334 assert_eq!(hash_datavalue(&val4), hash_datavalue(&val4));
335 }
336
337 #[test]
338 fn test_hash_datavalue_bytes() {
339 let val1 = DataValue::Bytes(vec![1, 2, 3]);
340 let val2 = DataValue::Bytes(vec![1, 2, 3]);
341 let val3 = DataValue::Bytes(vec![3, 2, 1]);
342 let val4 = DataValue::Bytes(vec![]);
343
344 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
345 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
346 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val4));
347 assert_eq!(
348 hash_datavalue(&val4),
349 hash_datavalue(&DataValue::Bytes(vec![]))
350 );
351 }
352
353 #[test]
354 fn test_hash_datavalue_vec_empty() {
355 let val = DataValue::Vec(vec![]);
356 assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
357 }
358
359 #[test]
360 fn test_hash_datavalue_vec_basic() {
361 let val1 = DataValue::Vec(vec![
362 DataValue::I32(1),
363 DataValue::I32(2),
364 DataValue::I32(3),
365 ]);
366 let val2 = DataValue::Vec(vec![
367 DataValue::I32(1),
368 DataValue::I32(2),
369 DataValue::I32(3),
370 ]);
371 let val3 = DataValue::Vec(vec![
372 DataValue::I32(3),
373 DataValue::I32(2),
374 DataValue::I32(1),
375 ]);
376
377 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
378 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
379 }
380
381 #[test]
382 fn test_hash_datavalue_vec_nested() {
383 let val1 = DataValue::Vec(vec![
384 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
385 DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(4)]),
386 ]);
387 let val2 = DataValue::Vec(vec![
388 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
389 DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(4)]),
390 ]);
391 let val3 = DataValue::Vec(vec![
392 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
393 DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(5)]),
394 ]);
395
396 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
397 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
398 }
399
400 #[test]
401 fn test_hash_datavalue_vec_mixed_types() {
402 let val = DataValue::Vec(vec![
403 DataValue::I32(42),
404 DataValue::String("test".into()),
405 DataValue::Bool(true),
406 DataValue::Null,
407 ]);
408
409 assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
410 }
411
412 #[test]
413 fn test_hash_datavalue_map_empty() {
414 let val = DataValue::Map(HashMap::new());
415 assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
416 }
417
418 #[test]
419 fn test_hash_datavalue_map_basic() {
420 let mut map1 = HashMap::new();
421 map1.insert("key1".into(), DataValue::I32(1));
422 map1.insert("key2".into(), DataValue::I32(2));
423
424 let mut map2 = HashMap::new();
425 map2.insert("key1".into(), DataValue::I32(1));
426 map2.insert("key2".into(), DataValue::I32(2));
427
428 let val1 = DataValue::Map(map1);
429 let val2 = DataValue::Map(map2);
430
431 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
432 }
433
434 #[test]
435 fn test_hash_datavalue_map_insertion_order_independence() {
436 let mut map1 = HashMap::new();
437 map1.insert("a".into(), DataValue::I32(1));
438 map1.insert("b".into(), DataValue::I32(2));
439 map1.insert("c".into(), DataValue::I32(3));
440
441 let mut map2 = HashMap::new();
442 map2.insert("c".into(), DataValue::I32(3));
443 map2.insert("a".into(), DataValue::I32(1));
444 map2.insert("b".into(), DataValue::I32(2));
445
446 let val1 = DataValue::Map(map1);
447 let val2 = DataValue::Map(map2);
448
449 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
450 }
451
452 #[test]
453 fn test_hash_datavalue_map_different_values() {
454 let mut map1 = HashMap::new();
455 map1.insert("key".into(), DataValue::I32(1));
456
457 let mut map2 = HashMap::new();
458 map2.insert("key".into(), DataValue::I32(2));
459
460 let val1 = DataValue::Map(map1);
461 let val2 = DataValue::Map(map2);
462
463 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
464 }
465
466 #[test]
467 fn test_hash_datavalue_map_nested() {
468 let mut inner_map = HashMap::new();
469 inner_map.insert("inner".into(), DataValue::I32(42));
470
471 let mut outer_map = HashMap::new();
472 outer_map.insert("outer".into(), DataValue::Map(inner_map.clone()));
473
474 let mut outer_map2 = HashMap::new();
475 outer_map2.insert("outer".into(), DataValue::Map(inner_map));
476
477 let val1 = DataValue::Map(outer_map);
478 let val2 = DataValue::Map(outer_map2);
479
480 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
481 }
482
483 #[test]
484 fn test_hash_datavalue_enum_number() {
485 let val1 = DataValue::EnumNumber(0);
486 let val2 = DataValue::EnumNumber(1);
487 let val3 = DataValue::EnumNumber(-1);
488 let val4 = DataValue::EnumNumber(0);
489
490 assert_eq!(hash_datavalue(&val1), hash_datavalue(&val4));
491 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
492 assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
493 }
494
495 #[test]
496 fn test_hash_datavalue_type_differentiation() {
497 let u32_val = DataValue::U32(42);
498 let i32_val = DataValue::I32(42);
499 let u64_val = DataValue::U64(42);
500 let i64_val = DataValue::I64(42);
501 let f32_val = DataValue::F32(42.0);
502 let f64_val = DataValue::F64(42.0);
503
504 let u32_hash = hash_datavalue(&u32_val);
505 let i32_hash = hash_datavalue(&i32_val);
506 let u64_hash = hash_datavalue(&u64_val);
507 let i64_hash = hash_datavalue(&i64_val);
508 let f32_hash = hash_datavalue(&f32_val);
509 let f64_hash = hash_datavalue(&f64_val);
510
511 assert_ne!(u32_hash, i32_hash);
512 assert_ne!(u32_hash, u64_hash);
513 assert_ne!(u32_hash, i64_hash);
514 assert_ne!(i32_hash, u64_hash);
515 assert_ne!(i32_hash, i64_hash);
516 assert_ne!(u64_hash, i64_hash);
517 assert_ne!(f32_hash, f64_hash);
518 }
519
520 #[test]
521 fn test_hash_datavalue_null_vs_zero() {
522 let null_val = DataValue::Null;
523 let zero_i32 = DataValue::I32(0);
524 let zero_u32 = DataValue::U32(0);
525 let false_bool = DataValue::Bool(false);
526
527 let null_hash = hash_datavalue(&null_val);
528 let zero_i32_hash = hash_datavalue(&zero_i32);
529 let zero_u32_hash = hash_datavalue(&zero_u32);
530 let false_hash = hash_datavalue(&false_bool);
531
532 assert_ne!(null_hash, zero_i32_hash);
533 assert_ne!(null_hash, zero_u32_hash);
534 assert_ne!(null_hash, false_hash);
535 }
536
537 #[test]
538 fn test_hash_datavalue_string_vs_bytes() {
539 let string_val = DataValue::String("hello".into());
540 let bytes_val = DataValue::Bytes(b"hello".to_vec());
541
542 assert_ne!(hash_datavalue(&string_val), hash_datavalue(&bytes_val));
543 }
544
545 #[test]
546 fn test_hash_datavalue_consistency() {
547 let test_values = vec![
548 DataValue::Null,
549 DataValue::Bool(true),
550 DataValue::U8(255),
551 DataValue::I32(-42),
552 DataValue::String("test".into()),
553 DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
554 ];
555
556 for val in test_values {
557 let hash1 = hash_datavalue(&val);
558 let hash2 = hash_datavalue(&val);
559 let hash3 = hash_datavalue(&val);
560 assert_eq!(hash1, hash2);
561 assert_eq!(hash2, hash3);
562 }
563 }
564}