use std::collections::HashSet;
use fst::{IntoStreamer, Set as FstSet, SetBuilder, Streamer};
use roaring::RoaringTreemap;
use crate::cc_traits::Len;
pub trait TombstoneSet<Key>: Len + Extend<Key> {
fn contains(&self, key: &Key) -> bool;
fn union_with(&mut self, other: &Self) -> usize;
}
#[derive(Default, Clone, Debug)]
pub struct RoaringTombstoneSet {
bitmap: RoaringTreemap,
}
impl RoaringTombstoneSet {
pub fn new() -> Self {
Self {
bitmap: RoaringTreemap::new(),
}
}
pub fn contains(&self, item: &u64) -> bool {
self.bitmap.contains(*item)
}
pub fn insert(&mut self, item: u64) -> bool {
self.bitmap.insert(item)
}
}
impl TombstoneSet<u64> for RoaringTombstoneSet {
fn contains(&self, key: &u64) -> bool {
self.bitmap.contains(*key)
}
fn union_with(&mut self, other: &Self) -> usize {
let old_len = self.len();
self.bitmap = &self.bitmap | &other.bitmap;
old_len
}
}
impl Extend<u64> for RoaringTombstoneSet {
fn extend<T: IntoIterator<Item = u64>>(&mut self, iter: T) {
self.bitmap.extend(iter);
}
}
impl Len for RoaringTombstoneSet {
fn len(&self) -> usize {
self.bitmap.len() as usize
}
}
impl IntoIterator for RoaringTombstoneSet {
type Item = u64;
type IntoIter = roaring::treemap::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.bitmap.into_iter()
}
}
impl FromIterator<u64> for RoaringTombstoneSet {
fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
Self {
bitmap: RoaringTreemap::from_iter(iter),
}
}
}
#[derive(Clone, Debug)]
pub struct FstTombstoneSet<Item> {
fst: FstSet<Vec<u8>>,
_phantom: std::marker::PhantomData<Item>,
}
impl<Item> Default for FstTombstoneSet<Item> {
fn default() -> Self {
Self::new()
}
}
impl<Item> FstTombstoneSet<Item> {
pub fn new() -> Self {
Self {
fst: FstSet::default(),
_phantom: std::marker::PhantomData,
}
}
pub(crate) fn from_fst(fst: FstSet<Vec<u8>>) -> Self {
Self {
fst,
_phantom: std::marker::PhantomData,
}
}
pub fn contains(&self, item: &[u8]) -> bool {
self.fst.contains(item)
}
pub fn len(&self) -> usize {
self.fst.len()
}
pub fn is_empty(&self) -> bool {
self.fst.is_empty()
}
fn union_internal(&self, other: &Self) -> Self {
let union_stream = self.fst.op().add(&other.fst).union();
let mut builder = SetBuilder::memory();
let mut stream = union_stream.into_stream();
while let Some(key) = stream.next() {
builder
.insert(key)
.expect("union stream keys are sorted, insert should not fail");
}
Self::from_fst(
FstSet::new(
builder
.into_inner()
.expect("memory builder should not fail"),
)
.expect("FST construction from valid builder should not fail"),
)
}
}
pub trait AsBytes {
fn as_bytes(&self) -> &[u8];
}
impl AsBytes for String {
fn as_bytes(&self) -> &[u8] {
self.as_bytes()
}
}
impl TombstoneSet<String> for FstTombstoneSet<String> {
fn contains(&self, key: &String) -> bool {
self.fst.contains(key.as_bytes())
}
fn union_with(&mut self, other: &Self) -> usize {
let old_len = self.len();
*self = self.union_internal(other);
old_len
}
}
impl Len for FstTombstoneSet<String> {
fn len(&self) -> usize {
self.fst.len()
}
}
impl Extend<String> for FstTombstoneSet<String> {
fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
let mut keys: Vec<_> = self.fst.stream().into_strs().unwrap();
keys.extend(iter);
keys.sort();
keys.dedup();
let mut builder = SetBuilder::memory();
for key in keys {
builder
.insert(key)
.expect("keys are sorted, insert should not fail");
}
self.fst = FstSet::new(
builder
.into_inner()
.expect("memory builder should not fail"),
)
.expect("FST construction from valid builder should not fail");
}
}
impl FromIterator<String> for FstTombstoneSet<String> {
fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
let mut keys: Vec<_> = iter.into_iter().collect();
keys.sort();
keys.dedup();
let mut builder = SetBuilder::memory();
for key in keys {
builder
.insert(key)
.expect("keys are sorted, insert should not fail");
}
Self::from_fst(
FstSet::new(
builder
.into_inner()
.expect("memory builder should not fail"),
)
.expect("FST construction from valid builder should not fail"),
)
}
}
impl IntoIterator for FstTombstoneSet<String> {
type Item = String;
type IntoIter = std::vec::IntoIter<String>;
fn into_iter(self) -> Self::IntoIter {
self.fst
.stream()
.into_strs()
.expect("FST contains valid UTF-8 strings")
.into_iter()
}
}
impl<K> TombstoneSet<K> for HashSet<K>
where
K: Eq + std::hash::Hash + Clone,
{
fn contains(&self, key: &K) -> bool {
HashSet::contains(self, key)
}
fn union_with(&mut self, other: &Self) -> usize {
let old_len = self.len();
#[expect(
clippy::disallowed_methods,
reason = "nondeterministic iteration order, fine to insert into set"
)]
for item in other.iter() {
self.insert(item.clone());
}
old_len
}
}