use crate::codec::SketchBytes;
use crate::codec::SketchSlice;
use crate::error::Error;
use crate::hll::HllType;
use crate::hll::KEY_MASK_26;
use crate::hll::container::COUPON_EMPTY;
use crate::hll::container::Container;
use crate::hll::serialization::*;
#[derive(Debug, Clone, PartialEq)]
pub struct HashSet {
container: Container,
}
impl Default for HashSet {
fn default() -> Self {
const LG_INIT_SET_SIZE: usize = 5;
Self::new(LG_INIT_SET_SIZE)
}
}
impl HashSet {
pub fn new(lg_size: usize) -> Self {
Self {
container: Container::new(lg_size),
}
}
pub fn update(&mut self, coupon: u32) {
let mask = (1 << self.container.lg_size()) - 1;
let mut probe = coupon & mask;
let starting_position = probe;
loop {
let value = &mut self.container.coupons[probe as usize];
if value == &COUPON_EMPTY {
*value = coupon;
self.container.len += 1;
break;
} else if value == &coupon {
break;
}
let stride = ((coupon & KEY_MASK_26) >> self.container.lg_size()) | 1;
probe = (probe + stride) & mask;
if probe == starting_position {
unreachable!("HashSet full; no empty slots");
}
}
}
pub fn container(&self) -> &Container {
&self.container
}
pub fn deserialize(
mut cursor: SketchSlice,
lg_arr: usize,
compact: bool,
) -> Result<Self, Error> {
let coupon_count = cursor
.read_u32_le()
.map_err(|_| Error::insufficient_data("coupon_count"))?;
let coupon_count = coupon_count as usize;
if compact {
let mut hash_set = HashSet::new(lg_arr);
for i in 0..coupon_count {
let coupon = cursor.read_u32_le().map_err(|_| {
Error::insufficient_data(format!(
"expected {coupon_count} coupons, failed at index {i}"
))
})?;
hash_set.update(coupon);
}
Ok(hash_set)
} else {
let array_size = 1 << lg_arr;
let mut coupons = vec![0u32; array_size];
for (i, coupon) in coupons.iter_mut().enumerate() {
*coupon = cursor.read_u32_le().map_err(|_| {
Error::insufficient_data(format!(
"expected {array_size} coupons, failed at index {i}"
))
})?;
}
Ok(Self {
container: Container::from_coupons(
lg_arr,
coupons.into_boxed_slice(),
coupon_count,
),
})
}
}
pub fn serialize(&self, lg_config_k: u8, hll_type: HllType) -> Vec<u8> {
let compact = true; let coupon_count = self.container.len();
let lg_arr = self.container.lg_size();
let array_size = if compact { coupon_count } else { 1 << lg_arr };
let total_size = SET_PREAMBLE_SIZE + (array_size * 4);
let mut bytes = SketchBytes::with_capacity(total_size);
bytes.write_u8(HASH_SET_PREINTS);
bytes.write_u8(SERIAL_VERSION);
bytes.write_u8(HLL_FAMILY_ID);
bytes.write_u8(lg_config_k);
bytes.write_u8(lg_arr as u8);
let mut flags = 0u8;
if compact {
flags |= COMPACT_FLAG_MASK;
}
bytes.write_u8(flags);
bytes.write_u8(0);
bytes.write_u8(encode_mode_byte(CUR_MODE_SET, hll_type as u8));
bytes.write_u32_le(coupon_count as u32);
if compact {
let mut coupons_vec: Vec<u32> = self
.container
.coupons
.iter()
.filter(|&&c| c != 0)
.copied()
.collect();
coupons_vec.sort_unstable();
for coupon in coupons_vec.iter().copied() {
bytes.write_u32_le(coupon);
}
} else {
for coupon in self.container.coupons.iter().copied() {
bytes.write_u32_le(coupon);
}
}
bytes.into_bytes()
}
}