pub mod biop;
mod iter;
#[cfg(test)]
mod proptest;
use crate::*;
use alloc::vec::Vec;
use biop::*;
use core::{cell::Cell, cmp::Ordering, convert::Infallible, fmt, ops};
pub use iter::GenericIter;
pub trait Key: Copy + Default + Ord + 'static {
fn split<C: Chunk>(self) -> (Self, u32);
#[must_use]
fn merge<C: Chunk>(self, bit: u32) -> Self;
}
pub trait Chunk:
Copy
+ Eq
+ ops::Not<Output = Self>
+ ops::BitAnd<Output = Self>
+ ops::BitOr<Output = Self>
+ ops::BitXor<Output = Self>
+ 'static
{
const BITS: u32;
const ZERO: Self;
fn mask(bit: u32) -> Self;
fn count_ones(self) -> u32;
fn highest_bit(self) -> u32;
fn lowest_bit(self) -> u32;
fn remove_lowest_bit(&mut self);
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(
any(test, feature = "serde"),
derive(serde::Serialize, serde::Deserialize)
)]
pub struct FlatBitSet<K = usize, C = u64>(Vec<(K, C)>);
#[must_use = "thunks are lazy and do nothing unless consumed"]
#[derive(Clone)]
pub struct Thunk<I>(I);
pub type Iter<'a, K, C> = GenericIter<IterChunk<'a, K, C>, K, C>;
type IterChunk<'a, K, C> = core::iter::Copied<core::slice::Iter<'a, (K, C)>>;
pub type IntoIter<K, C> = GenericIter<IntoIterChunk<K, C>, K, C>;
type IntoIterChunk<K, C> = alloc::vec::IntoIter<(K, C)>;
fn split_key<K: Key, C: Chunk>(key: K) -> (K, C) {
let (key, bit) = key.split::<C>();
(key, C::mask(bit))
}
impl<K, C> FlatBitSet<K, C> {
#[must_use]
pub const fn new() -> Self {
Self(Vec::new())
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self(Vec::with_capacity(capacity))
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
#[must_use]
pub fn inner(&self) -> &Vec<(K, C)> {
&self.0
}
pub fn clear(&mut self) {
self.0.clear();
}
}
impl<K: Key, C: Chunk> FlatBitSet<K, C> {
#[must_use]
pub fn count(&self) -> usize {
self.iter().count()
}
#[must_use]
#[inline]
pub fn contains(&self, key: K) -> bool {
let (key, mask) = split_key(key);
let idx = self.0.binary_search_by_key(&key, |c| c.0);
idx.is_ok_and(|idx| self.0[idx].1 & mask != C::ZERO)
}
fn search_mut(&mut self, key: K) -> (K, C, Result<(usize, &mut C), usize>, bool) {
let (key, mask) = split_key(key);
let (res, old) = match self.0.binary_search_by_key(&key, |c| c.0) {
Ok(idx) => {
let old = self.0[idx].1 & mask != C::ZERO;
(Ok((idx, &mut self.0[idx].1)), old)
}
Err(idx) => (Err(idx), false),
};
(key, mask, res, old)
}
pub fn insert(&mut self, key: K) -> bool {
let (key, mask, res, old) = self.search_mut(key);
match res {
Ok((_, chunk)) => *chunk = *chunk | mask,
Err(idx) => self.0.insert(idx, (key, mask)),
}
!old
}
pub fn remove(&mut self, key: K) -> bool {
let (_, mask, res, old) = self.search_mut(key);
if let Ok((idx, chunk)) = res {
*chunk = *chunk & !mask;
if *chunk == C::ZERO {
self.0.remove(idx);
}
}
old
}
pub fn toggle(&mut self, key: K) -> bool {
let (key, mask, res, old) = self.search_mut(key);
match res {
Ok((idx, chunk)) => {
*chunk = *chunk ^ mask;
if *chunk == C::ZERO {
self.0.remove(idx);
}
}
Err(idx) => self.0.insert(idx, (key, mask)),
}
old
}
pub fn set(&mut self, key: K, presence: bool) -> bool {
let (key, mask, res, old) = self.search_mut(key);
match res {
Ok((idx, chunk)) => {
if presence != old {
*chunk = *chunk ^ mask;
}
if *chunk == C::ZERO {
self.0.remove(idx);
}
}
Err(idx) => {
if presence {
self.0.insert(idx, (key, mask));
}
}
}
old
}
pub fn shrink_to_fit(&mut self) {
self.0.shrink_to_fit();
}
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional);
}
#[must_use]
pub fn is_subset(&self, other: &Self) -> bool {
self.try_zip_fold_chunks(
other,
(),
|(), _, _| Err(()),
|(), _, _| Ok(()),
|(), _, a, b| if a & b == a { Ok(()) } else { Err(()) },
)
.is_ok()
}
#[must_use]
pub fn is_superset(&self, other: &Self) -> bool {
other.is_subset(self)
}
#[must_use]
pub fn subset_cmp(&self, other: &Self) -> Option<Ordering> {
let combine = |o1, o2| match (o1, o2) {
(o, Ordering::Equal) | (Ordering::Equal, o) => Ok(o),
_ if o1 == o2 => Ok(o2),
_ => Err(()),
};
let cmp = |a, b| match () {
() if a == b => Ok(Ordering::Equal),
() if a & b == a => Ok(Ordering::Less),
() if a & b == b => Ok(Ordering::Greater),
() => Err(()),
};
self.try_zip_fold_chunks(
other,
Ordering::Equal,
|ord, _, _| combine(ord, Ordering::Greater),
|ord, _, _| combine(ord, Ordering::Less),
|ord, _, a, b| combine(ord, cmp(a, b)?),
)
.ok()
}
#[must_use]
pub fn is_disjoint(&self, other: &Self) -> bool {
self.try_zip_fold_chunks(
other,
(),
|(), _, _| Ok(()),
|(), _, _| Ok(()),
|(), _, a, b| if a & b == C::ZERO { Ok(()) } else { Err(()) },
)
.is_ok()
}
fn try_zip_fold_chunks<T, E>(
&self,
other: &Self,
init: T,
mut first: impl FnMut(T, K, C) -> Result<T, E>,
mut second: impl FnMut(T, K, C) -> Result<T, E>,
mut both: impl FnMut(T, K, C, C) -> Result<T, E>,
) -> Result<T, E> {
let mut it2 = other.chunks().peekable();
let acc = self.chunks().try_fold(init, |acc, (k1, c1)| {
let acc = core::iter::from_fn(|| it2.next_if(|&(k2, _)| k2 < k1))
.try_fold(acc, |acc, (k2, c2)| second(acc, k2, c2))?;
match it2.next_if(|&(k2, _)| k1 == k2) {
Some((_, c2)) => both(acc, k1, c1, c2),
None => first(acc, k1, c1),
}
})?;
it2.try_fold(acc, |acc, (k2, c2)| second(acc, k2, c2))
}
#[allow(clippy::missing_panics_doc)]
pub fn perform_biop_with<O: BiOp<C>>(&mut self, other: &Self) {
let mid = self.0.len();
let cap = match self.try_zip_fold_chunks::<_, Infallible>(
other,
mid,
|n, _, _| Ok(n),
|n, _, c2| Ok(n + usize::from(O::second(c2) != C::ZERO)),
|n, _, _, _| Ok(n),
) {
Ok(cap) => cap,
Err(x) => match x {},
};
self.0.resize(cap, (K::default(), C::ZERO));
let data = Cell::from_mut(&mut self.0[..]).as_slice_of_cells();
let mut writer = data.iter().rev();
let mut write = |k, c| {
if c != C::ZERO {
writer.next().unwrap().set((k, c));
}
};
let mut it2 = other.chunks().rev().peekable();
for kv in data[0..mid].iter().rev() {
let (k1, c1) = kv.get();
while let Some((k2, c2)) = it2.next_if(|&(k2, _)| k2 > k1) {
write(k2, O::second(c2));
}
let cur = it2
.next_if(|&(k2, _)| k1 == k2)
.map_or_else(|| O::first(c1), |(_, c2)| O::both(c1, c2));
write(k1, cur);
}
for (k2, c2) in it2 {
write(k2, O::second(c2));
}
let first = writer.len();
self.0.drain(..first);
}
fn chunks(&self) -> IterChunk<K, C> {
self.0.iter().copied()
}
pub fn iter(&self) -> Iter<K, C> {
Iter::new(self.chunks())
}
pub fn thunk(&self) -> Thunk<IterChunk<K, C>> {
Thunk(self.chunks())
}
}
impl<K, C> Default for FlatBitSet<K, C> {
fn default() -> Self {
Self::new()
}
}
impl<K: Key + fmt::Debug, C: Chunk> fmt::Debug for FlatBitSet<K, C> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_set().entries(self).finish()
}
}
impl<K: Key, C: Chunk> FromIterator<K> for FlatBitSet<K, C> {
fn from_iter<T: IntoIterator<Item = K>>(iter: T) -> Self {
let iter = iter.into_iter();
let mut this = Self::with_capacity(iter.size_hint().0 / C::BITS as usize);
this.extend(iter);
this
}
}
impl<K: Key, C: Chunk> Extend<K> for FlatBitSet<K, C> {
fn extend<T: IntoIterator<Item = K>>(&mut self, iter: T) {
for key in iter {
self.insert(key);
}
}
}
impl<'a, K: Key, C: Chunk> IntoIterator for &'a FlatBitSet<K, C> {
type Item = K;
type IntoIter = Iter<'a, K, C>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl<K: Key, C: Chunk> IntoIterator for FlatBitSet<K, C> {
type Item = K;
type IntoIter = IntoIter<K, C>;
fn into_iter(self) -> Self::IntoIter {
IntoIter::new(self.0.into_iter())
}
}
impl<K: Key, C: Chunk> ops::Index<K> for FlatBitSet<K, C> {
type Output = bool;
fn index(&self, index: K) -> &Self::Output {
if self.contains(index) {
&true
} else {
&false
}
}
}
impl<I, K, C> Thunk<I>
where
I: Iterator<Item = (K, C)>,
K: Key,
C: Chunk,
{
pub fn perform_biop<O, I2>(self, other: Thunk<I2>) -> Thunk<BiOpIter<O, I, I2>>
where
O: BiOp<C>,
I2: Iterator<Item = (K, C)>,
{
Thunk(BiOpIter::new(self.0, other.0))
}
pub fn collect(self) -> FlatBitSet<K, C>
where
I: Clone,
{
let len = self.0.clone().count();
let mut out = FlatBitSet::with_capacity(len);
for (k, c) in self.0 {
out.0.push((k, c));
}
debug_assert_eq!(out.0.len(), len);
out
}
}
impl<I, K, C> IntoIterator for Thunk<I>
where
I: Iterator<Item = (K, C)>,
K: Key,
C: Chunk,
{
type Item = K;
type IntoIter = GenericIter<I, K, C>;
fn into_iter(self) -> Self::IntoIter {
GenericIter::new(self.0)
}
}
macro_rules! impl_biop {
(
$trait:ident { $fn:ident },
$traitasgn:ident { $fnasgn:ident },
$opc:literal,
$ops:literal,
$res:literal,
$mthd:ident,
$mthditer:ident,
$mthdwith:ident,
$biop:ty
) => {
impl<I, K, C> Thunk<I>
where
I: Iterator<Item = (K, C)>,
K: Key,
C: Chunk,
{
#[doc = concat!("惰性计算", $opc)]
#[doc = concat!("可以使用运算符 `t1 ", $ops, " t2`,作用相同,更为简洁。")]
#[doc = concat!("let thunk = s1.thunk().", stringify!($mthd), "(s2.thunk());")]
#[doc = concat!("assert!(thunk.into_iter().eq(", $res, "));")]
#[doc = concat!("可以看看返回新集合的 [`FlatBitSet::", stringify!($mthd), "`]、就地计算的 [`FlatBitSet::", stringify!($mthdwith), "`] 以及返回迭代器的 [`FlatBitSet::", stringify!($mthditer), "`]。")]
pub fn $mthd<I2: Iterator<Item = (K, C)>>(
self,
other: Thunk<I2>,
) -> Thunk<BiOpIter<$biop, I, I2>> {
self.perform_biop(other)
}
}
impl<K: Key, C: Chunk> FlatBitSet<K, C> {
#[doc = concat!("计算", $opc)]
#[doc = concat!("可以使用运算符 `&s1 ", $ops, " &s2`,作用相同,更为简洁。")]
#[doc = concat!("let set = s1.", stringify!($mthd), "(&s2);")]
#[doc = concat!("assert!(set.iter().eq(", $res, "));")]
#[doc = concat!("可以看看就地计算的 [`FlatBitSet::", stringify!($mthdwith), "`]、返回迭代器的 [`FlatBitSet::", stringify!($mthditer), "`] 以及惰性计算的 [`Thunk::", stringify!($mthd), "`](该方法在内部就是用了它)。")]
#[must_use]
pub fn $mthd(&self, other: &Self) -> Self {
self.thunk().$mthd(other.thunk()).collect()
}
#[doc = concat!("就地计算", $opc)]
#[doc = concat!("可以使用运算符 `s1 ", $ops, " &s2`,作用相同,更为简洁。")]
#[doc = concat!("s1.", stringify!($mthdwith), "(&s2);")]
#[doc = concat!("assert!(s1.iter().eq(", $res, "));")]
#[doc = concat!("可以看看返回新集合的 [`FlatBitSet::", stringify!($mthd), "`]、返回迭代器的 [`FlatBitSet::", stringify!($mthditer), "`] 以及惰性计算的 [`Thunk::", stringify!($mthd), "`]。")]
pub fn $mthdwith(&mut self, other: &Self) {
self.perform_biop_with::<$biop>(other);
}
#[doc = concat!($opc, "中元素的迭代器")]
#[doc = concat!("let iter = s1.", stringify!($mthditer), "(&s2);")]
#[doc = concat!("assert!(iter.eq(", $res, "));")]
#[doc = concat!("可以看看返回新集合的 [`FlatBitSet::", stringify!($mthd), "`]、就地计算的 [`FlatBitSet::", stringify!($mthdwith), "`] 以及惰性计算的 [`Thunk::", stringify!($mthd), "`](该方法在内部就是用了它)。")]
pub fn $mthditer<'a>(
&'a self,
other: &'a Self,
) -> GenericIter<BiOpIter<$biop, IterChunk<'a, K, C>, IterChunk<'a, K, C>>, K, C> {
self.thunk().$mthd(other.thunk()).into_iter()
}
}
impl<K: Key, C: Chunk> ops::$traitasgn<&Self> for FlatBitSet<K, C> {
fn $fnasgn(&mut self, rhs: &Self) {
self.$mthdwith(rhs);
}
}
impl<K: Key, C: Chunk> ops::$trait<&Self> for FlatBitSet<K, C> {
type Output = Self;
fn $fn(mut self, rhs: &Self) -> Self::Output {
self.$mthdwith(rhs);
self
}
}
impl<K: Key, C: Chunk> ops::$trait for &FlatBitSet<K, C> {
type Output = FlatBitSet<K, C>;
fn $fn(self, rhs: Self) -> Self::Output {
self.$mthd(rhs)
}
}
impl<I, I2, K, C> ops::$trait<Thunk<I2>> for Thunk<I>
where
I: Iterator<Item = (K, C)>,
I2: Iterator<Item = (K, C)>,
K: Key,
C: Chunk,
{
type Output = Thunk<BiOpIter<$biop, I, I2>>;
fn $fn(self, rhs: Thunk<I2>) -> Self::Output {
self.$mthd(rhs)
}
}
};
}
impl_biop!(
BitAnd { bitand },
BitAndAssign { bitand_assign },
"交集",
"&",
"[2, 3]",
intersection,
intersection_iter,
intersection_with,
AndOp
);
impl_biop!(
BitOr { bitor },
BitOrAssign { bitor_assign },
"并集",
"|",
"[1, 2, 3, 4]",
union,
union_iter,
union_with,
OrOp
);
impl_biop!(
BitXor { bitxor },
BitXorAssign { bitxor_assign },
"对称差",
"^",
"[1, 4]",
symmetric_difference,
symmetric_difference_iter,
symmetric_difference_with,
XorOp
);
impl_biop!(
Sub { sub },
SubAssign { sub_assign },
"差集",
"-",
"[1]",
difference,
difference_iter,
difference_with,
DiffOp
);
macro_rules! impl_key {
($ty:ty) => {
#[allow(
clippy::cast_lossless,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_sign_loss
)]
impl Key for $ty {
#[inline]
fn merge<C: Chunk>(self, bit: u32) -> Self {
self | bit as Self
}
#[inline]
fn split<C: Chunk>(self) -> (Self, u32) {
let mask = (C::BITS - 1) as Self;
(self & !mask, (self & mask) as u32)
}
}
};
}
macro_rules! impl_chunk {
($ty:ty) => {
impl Chunk for $ty {
const BITS: u32 = Self::BITS;
const ZERO: Self = 0;
#[inline]
fn mask(bit: u32) -> Self {
1 << bit
}
#[inline]
fn count_ones(self) -> u32 {
self.count_ones()
}
#[inline]
fn highest_bit(self) -> u32 {
self.ilog2()
}
#[inline]
fn lowest_bit(self) -> u32 {
self.trailing_zeros()
}
#[inline]
fn remove_lowest_bit(&mut self) {
*self &= *self - 1;
}
}
};
}
impl_key!(u8);
impl_key!(u16);
impl_key!(u32);
impl_key!(u64);
impl_key!(u128);
impl_key!(usize);
impl_key!(i8);
impl_key!(i16);
impl_key!(i32);
impl_key!(i64);
impl_key!(i128);
impl_key!(isize);
impl_chunk!(u32);
impl_chunk!(u64);
impl_chunk!(u128);
impl_chunk!(usize);