use std::hash::{Hash, Hasher};
use data_value::DataValue;
use halfbrown::HashMap;
use super::{column_store::ColumnFrame, Key};
pub fn hash_datavalue(value: &DataValue) -> u64 {
use data_value::DataValue::*;
let mut hasher = std::collections::hash_map::DefaultHasher::new();
match value {
Null => hasher.write_u8(0),
Bool(b) => {
hasher.write_u8(1);
hasher.write_u8(*b as u8);
}
U8(u) => {
hasher.write_u8(2);
hasher.write_u8(*u);
}
U32(u) => {
hasher.write_u8(3);
hasher.write_u32(*u);
}
I32(i) => {
hasher.write_u8(4);
hasher.write_i32(*i);
}
U64(u) => {
hasher.write_u8(5);
hasher.write_u64(*u);
}
I64(i) => {
hasher.write_u8(6);
hasher.write_i64(*i);
}
F32(f) => {
hasher.write_u8(7);
hasher.write_u32(f.to_bits());
}
F64(f) => {
hasher.write_u8(8);
hasher.write_u64(f.to_bits());
}
U128(u) => {
hasher.write_u8(9);
hasher.write_u64((*u >> 64) as u64);
hasher.write_u64(*u as u64);
}
I128(i) => {
hasher.write_u8(10);
let u = *i as u128;
hasher.write_u64((u >> 64) as u64);
hasher.write_u64(u as u64);
}
String(s) => {
hasher.write_u8(11);
hasher.write(s.as_bytes());
}
Bytes(b) => {
hasher.write_u8(12);
hasher.write(b);
}
Vec(v) => {
hasher.write_u8(13);
hasher.write_usize(v.len());
for item in v {
let item_hash = hash_datavalue(item);
hasher.write_u64(item_hash);
}
}
Map(m) => {
hasher.write_u8(14);
hasher.write_usize(m.len());
let mut keys: std::vec::Vec<_> = m.keys().collect();
keys.sort();
for k in keys {
hasher.write(k.as_bytes());
if let Some(v) = m.get(k) {
let val_hash = hash_datavalue(v);
hasher.write_u64(val_hash);
}
}
}
EnumNumber(e) => {
hasher.write_u8(15);
hasher.write_i32(*e);
}
}
hasher.finish()
}
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
struct VecIndex {
hash: u64,
}
impl VecIndex {
pub fn new(value: &[DataValue]) -> Self {
let mut combined_hash: u64 = 0;
for v in value.iter() {
let h = hash_datavalue(v);
combined_hash = combined_hash.wrapping_mul(31).wrapping_add(h);
}
Self {
hash: combined_hash,
}
}
}
impl Hash for VecIndex {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
state.write_u64(self.hash);
}
}
impl From<&[DataValue]> for VecIndex {
fn from(value: &[DataValue]) -> Self {
Self::new(value)
}
}
#[derive(Debug)]
pub struct Index {
index: HashMap<VecIndex, Vec<usize>>,
}
impl Index {
pub fn new(key: Vec<Key>, df: &ColumnFrame) -> Self {
let selected = df.select(Some(key.as_slice()));
let mut this = Self {
index: HashMap::new(),
};
for (index, candidate) in selected.rows().into_iter().enumerate() {
if let Some(slice) = candidate.as_slice() {
this.index
.entry(VecIndex::from(slice))
.or_default()
.push(index);
}
}
this
}
pub fn get(&self, values: &[DataValue]) -> Option<&[usize]> {
self.index
.get(&VecIndex::from(values))
.map(|idx| idx.as_slice())
}
pub fn join(self, other: Index) -> Vec<(Vec<usize>, Vec<usize>)> {
let mut output = Vec::with_capacity(self.index.len());
for (index, left_index) in self.index.into_iter() {
if let Some(right_idx) = other.index.get(&index) {
output.push((left_index, right_idx.clone()));
}
}
output
}
}
#[cfg(test)]
mod tests {
use super::*;
use data_value::DataValue;
use std::collections::HashMap;
#[test]
fn test_hash_datavalue_null() {
let val = DataValue::Null;
let hash1 = hash_datavalue(&val);
let hash2 = hash_datavalue(&val);
assert_eq!(hash1, hash2);
}
#[test]
fn test_hash_datavalue_bool() {
let true_val = DataValue::Bool(true);
let false_val = DataValue::Bool(false);
let hash_true = hash_datavalue(&true_val);
let hash_false = hash_datavalue(&false_val);
assert_eq!(hash_true, hash_datavalue(&true_val));
assert_eq!(hash_false, hash_datavalue(&false_val));
assert_ne!(hash_true, hash_false);
}
#[test]
fn test_hash_datavalue_u8() {
let val1 = DataValue::U8(0);
let val2 = DataValue::U8(255);
let val3 = DataValue::U8(42);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
}
#[test]
fn test_hash_datavalue_u32() {
let val1 = DataValue::U32(0);
let val2 = DataValue::U32(u32::MAX);
let val3 = DataValue::U32(12345);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
}
#[test]
fn test_hash_datavalue_i32() {
let val1 = DataValue::I32(0);
let val2 = DataValue::I32(-1);
let val3 = DataValue::I32(i32::MAX);
let val4 = DataValue::I32(i32::MIN);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
}
#[test]
fn test_hash_datavalue_u64() {
let val1 = DataValue::U64(0);
let val2 = DataValue::U64(u64::MAX);
let val3 = DataValue::U64(123456789);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
}
#[test]
fn test_hash_datavalue_i64() {
let val1 = DataValue::I64(0);
let val2 = DataValue::I64(-1);
let val3 = DataValue::I64(i64::MAX);
let val4 = DataValue::I64(i64::MIN);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
}
#[test]
fn test_hash_datavalue_f32() {
let val1 = DataValue::F32(0.0);
let val2 = DataValue::F32(-0.0);
let val3 = DataValue::F32(3.14);
let val4 = DataValue::F32(f32::INFINITY);
let val5 = DataValue::F32(f32::NEG_INFINITY);
let val6 = DataValue::F32(f32::NAN);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
assert_eq!(hash_datavalue(&val3), hash_datavalue(&val3));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val4), hash_datavalue(&val5));
assert_eq!(
hash_datavalue(&val6),
hash_datavalue(&DataValue::F32(f32::NAN))
);
}
#[test]
fn test_hash_datavalue_f64() {
let val1 = DataValue::F64(0.0);
let val2 = DataValue::F64(-0.0);
let val3 = DataValue::F64(3.14159);
let val4 = DataValue::F64(f64::INFINITY);
let val5 = DataValue::F64(f64::NEG_INFINITY);
let val6 = DataValue::F64(f64::NAN);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
assert_eq!(hash_datavalue(&val3), hash_datavalue(&val3));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val4), hash_datavalue(&val5));
assert_eq!(
hash_datavalue(&val6),
hash_datavalue(&DataValue::F64(f64::NAN))
);
}
#[test]
fn test_hash_datavalue_u128() {
let val1 = DataValue::U128(0);
let val2 = DataValue::U128(u128::MAX);
let val3 = DataValue::U128(12345678901234567890);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
assert_eq!(hash_datavalue(&val2), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
}
#[test]
fn test_hash_datavalue_i128() {
let val1 = DataValue::I128(0);
let val2 = DataValue::I128(-1);
let val3 = DataValue::I128(i128::MAX);
let val4 = DataValue::I128(i128::MIN);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val1));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val3), hash_datavalue(&val4));
}
#[test]
fn test_hash_datavalue_string() {
let val1 = DataValue::String("hello".into());
let val2 = DataValue::String("world".into());
let val3 = DataValue::String("hello".into());
let val4 = DataValue::String("".into());
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val3));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val4));
assert_eq!(hash_datavalue(&val4), hash_datavalue(&val4));
}
#[test]
fn test_hash_datavalue_bytes() {
let val1 = DataValue::Bytes(vec![1, 2, 3]);
let val2 = DataValue::Bytes(vec![1, 2, 3]);
let val3 = DataValue::Bytes(vec![3, 2, 1]);
let val4 = DataValue::Bytes(vec![]);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val4));
assert_eq!(
hash_datavalue(&val4),
hash_datavalue(&DataValue::Bytes(vec![]))
);
}
#[test]
fn test_hash_datavalue_vec_empty() {
let val = DataValue::Vec(vec![]);
assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
}
#[test]
fn test_hash_datavalue_vec_basic() {
let val1 = DataValue::Vec(vec![
DataValue::I32(1),
DataValue::I32(2),
DataValue::I32(3),
]);
let val2 = DataValue::Vec(vec![
DataValue::I32(1),
DataValue::I32(2),
DataValue::I32(3),
]);
let val3 = DataValue::Vec(vec![
DataValue::I32(3),
DataValue::I32(2),
DataValue::I32(1),
]);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
}
#[test]
fn test_hash_datavalue_vec_nested() {
let val1 = DataValue::Vec(vec![
DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(4)]),
]);
let val2 = DataValue::Vec(vec![
DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(4)]),
]);
let val3 = DataValue::Vec(vec![
DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
DataValue::Vec(vec![DataValue::I32(3), DataValue::I32(5)]),
]);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
}
#[test]
fn test_hash_datavalue_vec_mixed_types() {
let val = DataValue::Vec(vec![
DataValue::I32(42),
DataValue::String("test".into()),
DataValue::Bool(true),
DataValue::Null,
]);
assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
}
#[test]
fn test_hash_datavalue_map_empty() {
let val = DataValue::Map(HashMap::new());
assert_eq!(hash_datavalue(&val), hash_datavalue(&val));
}
#[test]
fn test_hash_datavalue_map_basic() {
let mut map1 = HashMap::new();
map1.insert("key1".into(), DataValue::I32(1));
map1.insert("key2".into(), DataValue::I32(2));
let mut map2 = HashMap::new();
map2.insert("key1".into(), DataValue::I32(1));
map2.insert("key2".into(), DataValue::I32(2));
let val1 = DataValue::Map(map1);
let val2 = DataValue::Map(map2);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
}
#[test]
fn test_hash_datavalue_map_insertion_order_independence() {
let mut map1 = HashMap::new();
map1.insert("a".into(), DataValue::I32(1));
map1.insert("b".into(), DataValue::I32(2));
map1.insert("c".into(), DataValue::I32(3));
let mut map2 = HashMap::new();
map2.insert("c".into(), DataValue::I32(3));
map2.insert("a".into(), DataValue::I32(1));
map2.insert("b".into(), DataValue::I32(2));
let val1 = DataValue::Map(map1);
let val2 = DataValue::Map(map2);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
}
#[test]
fn test_hash_datavalue_map_different_values() {
let mut map1 = HashMap::new();
map1.insert("key".into(), DataValue::I32(1));
let mut map2 = HashMap::new();
map2.insert("key".into(), DataValue::I32(2));
let val1 = DataValue::Map(map1);
let val2 = DataValue::Map(map2);
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
}
#[test]
fn test_hash_datavalue_map_nested() {
let mut inner_map = HashMap::new();
inner_map.insert("inner".into(), DataValue::I32(42));
let mut outer_map = HashMap::new();
outer_map.insert("outer".into(), DataValue::Map(inner_map.clone()));
let mut outer_map2 = HashMap::new();
outer_map2.insert("outer".into(), DataValue::Map(inner_map));
let val1 = DataValue::Map(outer_map);
let val2 = DataValue::Map(outer_map2);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val2));
}
#[test]
fn test_hash_datavalue_enum_number() {
let val1 = DataValue::EnumNumber(0);
let val2 = DataValue::EnumNumber(1);
let val3 = DataValue::EnumNumber(-1);
let val4 = DataValue::EnumNumber(0);
assert_eq!(hash_datavalue(&val1), hash_datavalue(&val4));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val2));
assert_ne!(hash_datavalue(&val1), hash_datavalue(&val3));
}
#[test]
fn test_hash_datavalue_type_differentiation() {
let u32_val = DataValue::U32(42);
let i32_val = DataValue::I32(42);
let u64_val = DataValue::U64(42);
let i64_val = DataValue::I64(42);
let f32_val = DataValue::F32(42.0);
let f64_val = DataValue::F64(42.0);
let u32_hash = hash_datavalue(&u32_val);
let i32_hash = hash_datavalue(&i32_val);
let u64_hash = hash_datavalue(&u64_val);
let i64_hash = hash_datavalue(&i64_val);
let f32_hash = hash_datavalue(&f32_val);
let f64_hash = hash_datavalue(&f64_val);
assert_ne!(u32_hash, i32_hash);
assert_ne!(u32_hash, u64_hash);
assert_ne!(u32_hash, i64_hash);
assert_ne!(i32_hash, u64_hash);
assert_ne!(i32_hash, i64_hash);
assert_ne!(u64_hash, i64_hash);
assert_ne!(f32_hash, f64_hash);
}
#[test]
fn test_hash_datavalue_null_vs_zero() {
let null_val = DataValue::Null;
let zero_i32 = DataValue::I32(0);
let zero_u32 = DataValue::U32(0);
let false_bool = DataValue::Bool(false);
let null_hash = hash_datavalue(&null_val);
let zero_i32_hash = hash_datavalue(&zero_i32);
let zero_u32_hash = hash_datavalue(&zero_u32);
let false_hash = hash_datavalue(&false_bool);
assert_ne!(null_hash, zero_i32_hash);
assert_ne!(null_hash, zero_u32_hash);
assert_ne!(null_hash, false_hash);
}
#[test]
fn test_hash_datavalue_string_vs_bytes() {
let string_val = DataValue::String("hello".into());
let bytes_val = DataValue::Bytes(b"hello".to_vec());
assert_ne!(hash_datavalue(&string_val), hash_datavalue(&bytes_val));
}
#[test]
fn test_hash_datavalue_consistency() {
let test_values = vec![
DataValue::Null,
DataValue::Bool(true),
DataValue::U8(255),
DataValue::I32(-42),
DataValue::String("test".into()),
DataValue::Vec(vec![DataValue::I32(1), DataValue::I32(2)]),
];
for val in test_values {
let hash1 = hash_datavalue(&val);
let hash2 = hash_datavalue(&val);
let hash3 = hash_datavalue(&val);
assert_eq!(hash1, hash2);
assert_eq!(hash2, hash3);
}
}
}