#![deny(missing_docs)]
mod iter;
pub use iter::IntoIter;
const fn num_bits<T>() -> u64 {
std::mem::size_of::<T>() as u64 * 8
}
fn log_2(x: u64) -> u64 {
if x == 0 {
1
} else {
num_bits::<u64>() as u64 - x.leading_zeros() as u64
}
}
#[test]
fn test_log_2() {
assert_eq!(log_2(0), 1);
assert_eq!(log_2(1), 1);
assert_eq!(log_2(7), 3);
assert_eq!(log_2(8), 4);
}
fn compute_array_bits(mx: u64) -> u64 {
if log_2(mx) < 2 {
return 62;
} else if log_2(mx) > 62 {
return 0;
}
let mut bits = num_bits::<u64>() - log_2(mx);
while num_bits::<u64>() - log_2(mx) < bits {
bits += 1;
}
bits
}
#[test]
fn confirm_doctest_bits() {
assert_eq!(51, compute_array_bits(5000));
}
fn split_u64(x: u64, bits: u64) -> (u64, u64) {
if bits > 0 {
(x / bits, (x % bits))
} else {
(x, 0)
}
}
fn unsplit_u64(k: u64, offset: u64, bits: u64) -> u64 {
if bits > 0 {
k * bits + offset
} else {
k
}
}
pub struct SetU64(*mut S);
unsafe impl Send for SetU64 {}
unsafe impl Sync for SetU64 {}
use crate::copyset::impl_set_methods;
impl_set_methods!(SetU64);
#[repr(C)]
#[derive(Debug)]
struct S {
b: Sbeginning,
array: u64,
}
#[repr(C)]
#[derive(Debug)]
struct Sbeginning {
sz: usize,
cap: usize,
bits: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct Tiny {
sz: u8,
sz_spent: u8,
bits: usize,
last: usize,
}
fn mask(bits: usize) -> u64 {
(1 << bits) - 1
}
impl Iterator for Tiny {
type Item = u64;
fn next(&mut self) -> Option<u64> {
let bitsplits = BITSPLITS[self.sz as usize];
if self.sz_spent < self.sz {
let nbits = bitsplits[self.sz_spent as usize];
let difference = self.bits & mask(nbits as usize) as usize;
if self.sz_spent == 0 {
self.last = difference;
} else {
self.last = self.last + 1 + difference
}
self.bits = self.bits >> nbits;
self.sz_spent += 1;
Some(self.last as u64)
} else {
None
}
}
fn count(self) -> usize {
self.sz as usize
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.sz as usize, Some(self.sz as usize))
}
fn min(mut self) -> Option<u64> {
self.next()
}
fn max(self) -> Option<u64> {
self.last()
}
}
#[cfg(target_pointer_width = "64")]
static BITSPLITS: [&[u64]; 8] = [
&[],
&[61],
&[40, 21],
&[31, 15, 15],
&[25, 12, 12, 12],
&[21, 10, 10, 10, 10],
&[21, 8, 8, 8, 8, 8],
&[19, 7, 7, 7, 7, 7, 7],
];
#[cfg(target_pointer_width = "32")]
static BITSPLITS: [&[u64]; 8] = [
&[],
&[29],
&[20, 9],
&[15, 7, 7],
&[12, 5, 5, 5],
&[10, 4, 4, 4, 4],
&[9, 4, 4, 4, 4, 4],
&[8, 3, 3, 3, 3, 3, 3],
];
impl Tiny {
#[cfg(test)]
fn debug_me(self, msg: &str) {
println!("{}: {:?} => {:?}", msg, self, self.collect::<Vec<_>>());
}
fn to_usize(self) -> usize {
self.sz as usize | self.bits << 3
}
fn from_usize(x: usize) -> Self {
Tiny {
sz: x as u8 & 7,
bits: x >> 3,
sz_spent: 0,
last: 0,
}
}
fn from_singleton(x: u64) -> Option<Self> {
if log_2(x) > BITSPLITS[1][0] {
None
} else {
Some(Tiny {
sz: 1,
bits: x as usize,
sz_spent: 0,
last: 0,
})
}
}
fn new_sorted_deduped(v: &[u64]) -> Option<Self> {
if v.len() == 0 {
return None;
} else if v.len() > BITSPLITS.len() - 1 {
return None;
}
let sz = v.len() as u8;
let mut last = 0;
let mut offset = 0;
let mut bits: usize = 0;
let bitsplits = BITSPLITS[sz as usize];
for (x, nbits) in v.iter().cloned().zip(bitsplits.iter().cloned()) {
let y = if offset == 0 { x } else { x - last - 1 };
if log_2(y) > nbits {
return None;
}
bits = bits | (y as usize) << offset;
offset += nbits;
last = x;
}
Some(Tiny {
sz,
bits,
sz_spent: 0,
last: 0,
})
}
fn insert(mut self, e: u64) -> Option<Self> {
if e > std::usize::MAX as u64 {
return None;
}
let mut e = e as usize;
let old_bitsplits = BITSPLITS[self.sz as usize];
if let Some(new_bitsplits) = BITSPLITS.get(self.sz as usize + 1) {
let mut new = Tiny {
bits: 0,
sz: self.sz + 1,
last: 0,
sz_spent: 0,
};
let backup = self.clone();
let mut offset = 0;
let mut old_iter = old_bitsplits.iter().cloned();
let mut new_iter = new_bitsplits.iter().cloned();
while let Some(newb) = new_iter.next() {
if let Some(oldb) = old_iter.next() {
let mut n = self.bits & mask(oldb as usize) as usize;
if e == n {
return Some(backup);
} else if log_2(n as u64) > newb {
if e < n {
return None;
}
e -= n + 1;
self.bits = self.bits >> oldb;
for oldb in old_iter {
let n = self.bits & mask(oldb as usize) as usize;
if n == e {
return Some(backup);
}
if e < n {
return None;
}
e -= n + 1;
self.bits = self.bits >> oldb;
}
return None;
} else if e < n {
new.bits = new.bits | (e << offset);
offset += newb;
n = n - e - 1;
let newb = new_iter.next().unwrap();
if log_2(n as u64) > newb {
return None;
}
new.bits = new.bits | (n << offset);
offset += newb;
self.bits = self.bits >> oldb;
for newb in new_iter {
let oldb = old_iter.next().unwrap();
let n = self.bits & mask(oldb as usize) as usize;
if log_2(n as u64) > newb {
return None;
}
new.bits = new.bits | (n << offset);
self.bits = self.bits >> oldb;
offset += newb;
}
return Some(new);
}
e -= n + 1;
new.bits = new.bits | (n << offset);
offset += newb;
self.bits = self.bits >> oldb;
} else {
if log_2(e as u64) > newb {
return None;
}
new.bits = new.bits | (e << offset);
}
}
Some(new)
} else {
if self.clone().any(|x| x == e as u64) {
Some(self)
} else {
None
}
}
}
fn contains(mut self, e: u64) -> bool {
if e > std::usize::MAX as u64 {
return false;
}
let mut e = e as usize;
let bitsplits = BITSPLITS[self.sz as usize];
for b in bitsplits.iter().cloned() {
let n: usize = self.bits & mask(b as usize) as usize;
if e == n {
return true;
} else if e < n {
return false;
}
e -= n + 1;
self.bits = self.bits >> b;
}
false
}
}
#[cfg(test)]
fn test_vec(v: Vec<u64>) {
println!("\ntesting {:?}", v);
assert_eq!(Tiny::new_sorted_deduped(&v).unwrap().collect::<Vec<_>>(), v);
}
#[test]
fn test_tiny() {
assert_eq!(Tiny::new_sorted_deduped(&[]), None);
test_vec(vec![1]);
test_vec(vec![1024]);
test_vec(vec![1, 2]);
test_vec(vec![1, 2, 3]);
test_vec(vec![1, 2, 3, 4, 5]);
test_vec(vec![1, 2, 3, 4, 5, 6]);
test_vec(vec![1, 2, 3, 4, 5, 6, 7]);
}
enum Internal<'a> {
Empty,
Stack(Tiny),
Heap {
s: &'a Sbeginning,
a: &'a [u64],
},
Big {
s: &'a Sbeginning,
a: &'a [u64],
},
Dense {
sz: usize,
a: &'a [u64],
},
}
enum InternalMut<'a> {
Empty,
Stack(Tiny),
Heap {
s: &'a mut Sbeginning,
a: &'a mut [u64],
},
Big {
s: &'a mut Sbeginning,
a: &'a mut [u64],
},
Dense {
sz: &'a mut usize,
a: &'a mut [u64],
},
}
impl Extend<u64> for SetU64 {
fn extend<T: IntoIterator<Item = u64>>(&mut self, iter: T) {
for i in iter.into_iter() {
self.insert(i);
}
}
}
#[cfg(feature = "compactserde")]
impl SetU64 {
fn to_array(&self) -> Vec<u64> {
let mut out = Vec::new();
if self.0 as usize == 0 || self.0 as usize & 7 != 0 {
out.push(self.0 as u64);
} else {
let s = unsafe { &*self.0 };
let b = &s.b;
let a = unsafe { std::slice::from_raw_parts(&s.array as *const u64, b.cap) };
out.push(b.sz as u64);
out.push(b.bits as u64);
out.extend(a);
}
out
}
fn from_array(v: &[u64]) -> SetU64 {
if v.len() > 1 {
let cap = v.len() - 2;
let mut set = SetU64::with_capacity_and_bits(cap, v[1]);
match set.internal_mut() {
InternalMut::Empty => unreachable!(),
InternalMut::Stack(_) => unreachable!(),
InternalMut::Dense { sz, a } => {
*sz = v[0] as usize;
for (i, o) in v[2..].iter().zip(a.iter_mut()) {
*o = *i;
}
}
InternalMut::Heap { s, a } => {
s.sz = v[0] as usize;
s.bits = v[1];
for (i, o) in v[2..].iter().zip(a.iter_mut()) {
*o = *i;
}
}
InternalMut::Big { s, a } => {
s.sz = v[0] as usize;
s.bits = v[1];
for (i, o) in v[2..].iter().zip(a.iter_mut()) {
*o = *i;
}
}
}
set
} else {
SetU64(v[0] as *mut S)
}
}
}
#[cfg(feature = "compactserde")]
#[test]
fn to_from_array() {
use std::iter::FromIterator;
let set = SetU64::from_iter([0]);
let s = set.to_array();
assert_eq!(set, SetU64::from_array(&s));
let set = SetU64::from_iter([]);
let s = set.to_array();
assert_eq!(set, SetU64::from_array(&s));
let set = SetU64::from_iter([u64::MAX, u64::MAX - 100]);
let s = set.to_array();
let newset = SetU64::from_array(&s);
for n in set.iter() {
assert!(set.contains(n));
}
for n in newset.iter() {
assert!(newset.contains(n));
}
println!("set is {set:?}");
println!("newset is {newset:?}");
assert_eq!(set.len(), newset.len());
assert_eq!(set, SetU64::from_array(&s));
let set = SetU64::from_iter(0..10000);
let s = set.to_array();
assert_eq!(set, SetU64::from_array(&s));
}
#[cfg(feature = "serde")]
mod serde {
use crate::SetU64;
use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor};
use serde::ser::{Serialize, SerializeSeq, Serializer};
impl Serialize for SetU64 {
#[cfg(feature = "compactserde")]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let a = self.to_array();
let mut seq = serializer.serialize_seq(Some(a.len()))?;
for e in a.into_iter() {
seq.serialize_element(&e)?;
}
seq.end()
}
#[cfg(not(feature = "compactserde"))]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut seq = serializer.serialize_seq(Some(self.len()))?;
for e in self.iter() {
seq.serialize_element(&e)?;
}
seq.end()
}
}
struct SetVisitor;
impl SetVisitor {
fn new() -> Self {
SetVisitor
}
}
impl<'de> Visitor<'de> for SetVisitor {
type Value = SetU64;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a set of usize")
}
#[cfg(feature = "compactserde")]
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
M: SeqAccess<'de>,
{
let mut v = if let Some(cap) = access.size_hint() {
Vec::with_capacity(cap)
} else {
Vec::new()
};
while let Some(elem) = access.next_element()? {
v.push(elem);
}
Ok(SetU64::from_array(&v))
}
#[cfg(not(feature = "compactserde"))]
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
M: SeqAccess<'de>,
{
let mut set = SetU64::new();
while let Some(elem) = access.next_element()? {
set.insert(elem);
}
Ok(set)
}
}
impl<'de> Deserialize<'de> for SetU64 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_seq(SetVisitor::new())
}
}
#[test]
fn serialize_deserialize() {
use std::iter::FromIterator;
let set = SetU64::from_iter([0]);
let s = serde_json::to_string(&set).unwrap();
assert_eq!(set, serde_json::from_str(&s).unwrap());
let set = SetU64::from_iter([]);
let s = serde_json::to_string(&set).unwrap();
assert_eq!(set, serde_json::from_str(&s).unwrap());
let set = SetU64::from_iter([u64::MAX, u64::MAX - 100]);
let s = serde_json::to_string(&set).unwrap();
assert_eq!(set, serde_json::from_str(&s).unwrap());
let set = SetU64::from_iter(0..10000);
let s = serde_json::to_string(&set).unwrap();
assert_eq!(set, serde_json::from_str(&s).unwrap());
}
}
impl Clone for SetU64 {
fn clone(&self) -> Self {
if self.0 as usize & 7 == 0 && self.0 != std::ptr::null_mut() {
let c = self.capacity();
unsafe {
let ptr = std::alloc::alloc_zeroed(layout_for_capacity(c)) as *mut S;
if ptr.is_null() {
panic!("memory allocation failed");
}
std::ptr::copy_nonoverlapping(
self.0 as *const u8,
ptr as *mut u8,
bytes_for_capacity(c),
);
SetU64(ptr)
}
} else {
SetU64(self.0)
}
}
}
impl SetU64 {
pub fn with_capacity_of(other: &Self) -> Self {
if other.0 as usize & 7 == 0 && other.0 != std::ptr::null_mut() {
let c = other.capacity();
unsafe {
let ptr = std::alloc::alloc_zeroed(layout_for_capacity(c)) as *mut S;
if ptr.is_null() {
panic!("memory allocation failed");
}
(*ptr).b.cap = (*other.0).b.cap;
(*ptr).b.bits = (*other.0).b.bits;
SetU64(ptr)
}
} else {
SetU64::new()
}
}
}
#[test]
fn just_clone() {
let mut x = SetU64::with_capacity_and_max(100, 1000000);
x.insert(100);
x.insert(1000);
let y = x.clone();
assert_eq!(x.len(), y.len());
assert_eq!(x.len(), x.into_iter().count());
assert_eq!(y.len(), y.clone().into_iter().count());
}
impl SetU64 {
#[inline]
pub fn len(&self) -> usize {
match self.internal() {
Internal::Empty => 0,
Internal::Stack(t) => t.sz as usize,
Internal::Heap { s, .. } => s.sz,
Internal::Big { s, .. } => s.sz,
Internal::Dense { sz, .. } => sz,
}
}
#[inline]
pub fn capacity(&self) -> usize {
match self.internal() {
Internal::Empty => 0,
Internal::Stack(_) => 0,
Internal::Heap { a, .. } => a.len(),
Internal::Big { a, .. } => a.len(),
Internal::Dense { a, .. } => a.len(),
}
}
pub fn debug_me(&self, msg: &str) {
match self.internal() {
Internal::Empty => println!("empty set: {}", msg),
Internal::Stack(t) => println!("{}: stack {:?} => {:?}", msg, t, t.collect::<Vec<_>>()),
Internal::Heap { s, a } => {
println!("{}: heap {:?}", msg, s);
for (i, x) in a.iter().cloned().enumerate() {
println!(
" {} (key {} pov {}): {:0b} ({})",
(x >> s.bits) * s.bits,
x >> s.bits,
p_poverty(x >> s.bits, i, a.len()),
x & mask(s.bits as usize),
x
);
}
println!(" => {:?}", self.iter().collect::<Vec<_>>());
}
Internal::Big { s, a } => {
println!("{}: big {:?}\n {:?}", msg, s, a);
let v: Vec<_> = a.iter().cloned().map(|x| x % a.len() as u64).collect();
println!(" >>>{:?}", v);
}
Internal::Dense { sz, a } => {
println!(
"{}: dense {:?}\n {:?}\n => {:?}",
msg,
sz,
a,
self.iter().collect::<Vec<u64>>()
);
println!(" foo {:?}", self.iter());
}
}
}
#[inline]
pub fn mem_used(&self) -> usize {
match self.internal() {
Internal::Empty => std::mem::size_of::<Self>(),
Internal::Stack(_) => std::mem::size_of::<Self>(),
Internal::Heap { s, .. } => std::mem::size_of::<Self>() + s.cap * 8 - 8,
Internal::Dense { a, .. } => std::mem::size_of::<Self>() + a.len() * 8 - 8,
Internal::Big { s, .. } => std::mem::size_of::<Self>() + s.cap * 8 - 8,
}
}
fn dense_with_max(mx: u64) -> SetU64 {
let cap = 1 + mx / 64 + mx / 256;
unsafe {
let ptr = std::alloc::alloc_zeroed(layout_for_capacity(cap as usize)) as *mut S;
if ptr.is_null() {
panic!("memory allocation failed");
}
let x = SetU64(ptr);
(*x.0).b.cap = cap as usize;
(*x.0).b.bits = 64;
x
}
}
pub fn with_capacity_and_max(cap: usize, mx: u64) -> SetU64 {
if cap as u64 > mx >> 7 {
SetU64::dense_with_max(mx)
} else {
SetU64::with_capacity_and_bits(cap, compute_array_bits(mx))
}
}
pub fn with_capacity_and_bits(cap: usize, bits: u64) -> SetU64 {
if cap > 0 {
unsafe {
let ptr = std::alloc::alloc_zeroed(layout_for_capacity(cap)) as *mut S;
if ptr.is_null() {
panic!("memory allocation failed");
}
let x = SetU64(ptr);
(*x.0).b.cap = cap;
(*x.0).b.bits = if bits == 0 {
let mut b = 0;
while b <= 64 {
b = crate::rand::rand64(cap, bits);
}
b
} else {
bits
};
x
}
} else {
SetU64(0 as *mut S)
}
}
#[inline]
pub const fn new() -> Self {
SetU64(0 as *mut S)
}
pub fn insert(&mut self, e: u64) -> bool {
match self.internal_mut() {
InternalMut::Empty => {
if let Some(t) = Tiny::from_singleton(e) {
*self = SetU64(t.to_usize() as *mut S);
return true;
}
*self = Self::with_capacity_and_max(1, e);
}
InternalMut::Stack(t) => {
if let Some(newt) = t.insert(e) {
*self = SetU64(newt.to_usize() as *mut S);
return newt.sz != t.sz;
}
let mx = t.max().unwrap();
let mx = if e > mx { e } else { mx };
*self = Self::with_capacity_and_max(t.sz as usize + 1, mx);
for x in t {
self.insert(x);
}
self.insert(e);
return true;
}
_ => (),
}
match self.internal_mut() {
InternalMut::Empty => unreachable!(),
InternalMut::Stack(_) => unreachable!(),
InternalMut::Dense { sz, a } => {
let key = (e >> 6) as usize;
if let Some(bits) = a.get_mut(key) {
let whichbit = 1 << (e & 63);
let present = *bits & whichbit != 0;
*bits = *bits | whichbit;
if !present {
*sz = *sz + 1;
}
!present
} else {
if key > 128 * (*sz as usize) {
let cap = 2 * (*sz + 1);
let mut new = SetU64::with_capacity_and_bits(cap as usize, 0);
for x in self.iter() {
new.insert(x);
}
new.insert(e);
*self = new;
} else {
let mut new = SetU64::with_capacity_and_bits(1 + key + key / 4, 64);
match new.internal_mut() {
InternalMut::Empty => unreachable!(),
InternalMut::Stack(_) => unreachable!(),
InternalMut::Big { .. } => unreachable!(),
InternalMut::Heap { .. } => unreachable!(),
InternalMut::Dense { sz: newsz, a: na } => {
for (i, v) in a.iter().cloned().enumerate() {
na[i] = v;
}
na[key] = 1 << (e & 63);
*newsz = *sz + 1;
}
}
*self = new;
}
true
}
}
InternalMut::Heap { s, a } => {
if compute_array_bits(e) < s.bits {
let mut new = Self::with_capacity_and_bits(
s.cap + 1 + 2 * (crate::rand::rand_usize(s.cap, s.bits) % s.cap),
compute_array_bits(e),
);
for d in self.iter() {
new.insert(d);
}
new.insert(e);
*self = new;
return true;
}
let (key, offset) = split_u64(e, s.bits);
match p_lookfor(key, a, s.bits) {
LookedUp::KeyFound(idx) => {
if a[idx] & (1 << offset) != 0 {
return false;
} else {
a[idx] = a[idx] | (1 << offset);
s.sz += 1;
return true;
}
}
LookedUp::EmptySpot(idx) => {
a[idx] = key << s.bits | 1 << offset;
s.sz += 1;
return true;
}
LookedUp::NeedInsert => {}
}
if a.iter().cloned().any(|x| x == 0) {
let idx = p_insert(key, a, s.bits);
a[idx] = (key << s.bits) | (1 << offset);
s.sz += 1;
return true;
}
let mx = a
.iter()
.cloned()
.map(|x| (x >> s.bits) * s.bits + s.bits)
.max()
.unwrap();
let mx = if e > mx { e } else { mx };
if s.cap as u64 > mx >> 6 {
let mut new = Self::dense_with_max(mx);
for x in self.iter() {
new.insert(x);
}
new.insert(e);
*self = new;
} else {
let newcap: usize = s.cap + 1 + (crate::rand::rand_usize(s.cap, s.bits) % s.cap);
let mut new = Self::with_capacity_and_bits(newcap, s.bits);
for v in self.iter() {
new.insert(v);
}
new.insert(e);
*self = new;
}
true
}
InternalMut::Big { s, a } => {
if e == s.bits {
let had_zero = p_remove(s.bits, a, 0);
loop {
let i: u64 = crate::rand::rand64(s.cap, s.bits);
if i > 64 && !a.iter().any(|&v| v == i) {
s.bits = i;
break;
}
}
if had_zero {
a[p_insert(s.bits, a, 0)] = s.bits;
}
}
let e = if e == 0 { s.bits } else { e };
match p_lookfor(e, a, 0) {
LookedUp::KeyFound(_) => {
return false;
}
LookedUp::EmptySpot(idx) => {
a[idx] = e;
s.sz += 1;
return true;
}
LookedUp::NeedInsert => (),
}
if a.iter().cloned().any(|x| x == 0) {
a[p_insert(e, a, 0)] = e;
s.sz += 1;
return true;
}
let newcap: usize = s.cap + 1 + (crate::rand::rand_usize(s.cap, s.bits) % (2 * s.cap));
let mut new = Self::with_capacity_and_bits(newcap, s.bits);
match new.internal_mut() {
InternalMut::Empty => unreachable!(),
InternalMut::Stack(_) => unreachable!(),
InternalMut::Dense { .. } => unreachable!(),
InternalMut::Heap { .. } => unreachable!(),
InternalMut::Big { s: ns, a: na } => {
for v in a.iter().cloned().filter(|&x| x != 0) {
na[p_insert(v, na, 0)] = v;
}
na[p_insert(e, na, 0)] = e;
ns.sz = s.sz + 1;
}
}
*self = new;
true
}
}
}
pub fn remove(&mut self, e: u64) -> bool {
match self.internal_mut() {
InternalMut::Empty => false,
InternalMut::Stack(t) => {
if t.clone().any(|x| x == e) {
let sz = t.sz - 1;
if sz == 0 {
*self = SetU64(0 as *mut S);
} else {
*self = t.filter(|&x| x != e).collect();
}
true
} else {
false
}
}
InternalMut::Dense { sz, a } => {
let key = e >> 6;
if let Some(bits) = a.get_mut(key as usize) {
let whichbit = 1 << (e & 63);
let present = *bits & whichbit != 0;
*bits = *bits & !whichbit;
if present {
*sz = *sz - 1;
}
present
} else {
false
}
}
InternalMut::Heap { s, a } => {
if compute_array_bits(e) < s.bits {
return false;
}
let (key, offset) = split_u64(e, s.bits);
if let LookedUp::KeyFound(idx) = p_lookfor(key, a, s.bits) {
if a[idx] & (1 << offset) != 0 {
let newa = a[idx] & !(1 << offset);
s.sz -= 1;
if newa == key << s.bits {
p_remove(key, a, s.bits);
} else {
a[idx] = newa;
}
true
} else {
false
}
} else {
false
}
}
InternalMut::Big { s, a } => {
if e == s.bits {
return false;
}
let e = if e == 0 { s.bits } else { e };
let had_e = p_remove(e, a, 0);
if had_e {
s.sz -= 1;
}
had_e
}
}
}
pub fn contains(&self, e: u64) -> bool {
match self.internal() {
Internal::Empty => false,
Internal::Stack(t) => t.contains(e), Internal::Dense { a, .. } => {
let key = e >> 6;
if let Some(bits) = a.get(key as usize) {
bits & (1 << (e & 63)) != 0
} else {
false
}
}
Internal::Heap { s, a } => {
if compute_array_bits(e) < s.bits {
return false;
}
let (key, offset) = split_u64(e, s.bits);
if let LookedUp::KeyFound(idx) = p_lookfor(key, a, s.bits) {
a[idx] & (1 << offset) != 0
} else {
false
}
}
Internal::Big { s, a } => {
if e == s.bits {
return false;
}
let e = if e == 0 { s.bits } else { e };
p_lookfor(e, a, 0).key_found()
}
}
}
#[inline]
pub fn drain<'a>(&mut self) -> impl Iterator<Item = u64> + 'static {
std::mem::replace(self, SetU64::new()).into_iter()
}
#[inline]
fn internal<'a>(&'a self) -> Internal<'a> {
if self.0 as usize == 0 {
Internal::Empty
} else if self.0 as usize & 7 != 0 {
Internal::Stack(Tiny::from_usize(self.0 as usize))
} else {
let s = unsafe { &*self.0 };
let b = &s.b;
let array = unsafe {
self.0.cast::<u64>().offset(
(&s.array as *const u64)
.offset_from(self.0.cast()),
)
};
let a = unsafe { std::slice::from_raw_parts(array, b.cap as usize) };
if b.bits == 0 || b.bits > 64 {
Internal::Big { s: b, a }
} else if b.bits == 64 {
Internal::Dense { sz: b.sz, a }
} else {
Internal::Heap { s: b, a }
}
}
}
fn internal_mut<'a>(&'a mut self) -> InternalMut<'a> {
if self.0 as usize == 0 {
InternalMut::Empty
} else if self.0 as usize & 7 != 0 {
InternalMut::Stack(Tiny::from_usize(self.0 as usize))
} else {
let s = unsafe { &mut *self.0 };
let b = &mut s.b;
let array = unsafe {
self.0.cast::<u64>().offset(
(&mut s.array as *mut u64)
.offset_from(self.0.cast()),
)
};
let a = unsafe { std::slice::from_raw_parts_mut(array, b.cap as usize) };
if b.bits == 0 || b.bits > 64 {
InternalMut::Big { s: b, a }
} else if b.bits == 64 {
InternalMut::Dense { sz: &mut b.sz, a }
} else {
InternalMut::Heap { s: b, a }
}
}
}
}
#[cfg(test)]
impl crate::copyset::CopySet for SetU64 {
type Item = u64;
type Iter = IntoIter;
fn ins(&mut self, e: u64) -> bool {
self.insert(e)
}
fn rem(&mut self, e: u64) -> bool {
self.remove(e)
}
fn con(&self, e: u64) -> bool {
self.contains(e)
}
fn vec(&self) -> Vec<u64> {
self.iter().collect()
}
fn ln(&self) -> usize {
self.len()
}
fn it(self) -> Self::Iter {
self.into_iter()
}
}
impl Default for SetU64 {
fn default() -> Self {
SetU64(0 as *mut S)
}
}
impl std::iter::FromIterator<u64> for SetU64 {
fn from_iter<T>(iter: T) -> Self
where
T: IntoIterator<Item = u64>,
{
let mut v: Vec<_> = iter.into_iter().collect();
v.sort();
v.dedup();
if let Some(mx) = v.iter().cloned().max() {
if let Some(t) = Tiny::new_sorted_deduped(&v) {
SetU64(t.to_usize() as *mut S)
} else {
if v.len() as u64 > mx >> 4 {
let mut s = SetU64::with_capacity_and_max(v.len(), mx);
for value in v.into_iter() {
s.insert(value);
}
return s;
}
let bits = compute_array_bits(mx);
if bits == 0 {
let mut s = SetU64::with_capacity_and_bits(v.len(), bits);
for value in v.into_iter() {
s.insert(value);
}
s
} else {
let mut keys: Vec<_> = v.iter().map(|&x| x / bits).collect();
keys.sort();
keys.dedup();
let sz = (keys.len() + 1) * 11 / 10;
let mut s = SetU64::with_capacity_and_bits(sz, bits);
for value in v.into_iter() {
s.insert(value);
}
s
}
}
} else {
SetU64(0 as *mut S)
}
}
}
#[cfg(test)]
fn test_a_collect(v: Vec<u64>) {
let s: SetU64 = v.iter().cloned().collect();
let vv: Vec<_> = s.iter().collect();
let ss: SetU64 = vv.iter().cloned().collect();
let vvv: Vec<_> = ss.iter().collect();
assert_eq!(vv, vvv);
}
#[test]
#[should_panic]
fn test_alloc_failure() {
SetU64::with_capacity_and_bits(usize::MAX / 8 - 2, 0);
}
#[test]
fn test_collect() {
test_a_collect(vec![]);
test_a_collect(vec![0]);
test_a_collect(vec![0, 1 << 60]);
test_a_collect(vec![0, 1 << 30, 1 << 60]);
test_a_collect((0..1024).collect());
}
fn bytes_for_capacity(sz: usize) -> usize {
sz * 8 + std::mem::size_of::<S>() - 8
}
fn layout_for_capacity(sz: usize) -> std::alloc::Layout {
let size = bytes_for_capacity(sz);
if size >= usize::MAX / 2 {
panic!("tinyset size is too large: {}", sz);
}
unsafe { std::alloc::Layout::from_size_align_unchecked(size, 8) }
}
impl Drop for SetU64 {
fn drop(&mut self) {
if self.0 as usize > 0 {
let c = self.capacity();
if c == 0 {
} else {
unsafe {
std::alloc::dealloc(self.0 as *mut u8, layout_for_capacity(c));
}
}
}
}
}
#[cfg(test)]
impl heapsize::HeapSizeOf for SetU64 {
fn heap_size_of_children(&self) -> usize {
match self.internal() {
Internal::Empty => 0,
Internal::Stack(_) => 0,
Internal::Heap { a, .. } => std::mem::size_of::<S>() - 8 + a.len() * 8,
Internal::Big { a, .. } => std::mem::size_of::<S>() - 8 + a.len() * 8,
Internal::Dense { a, .. } => std::mem::size_of::<S>() - 8 + a.len() * 8,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use heapsize::HeapSizeOf;
fn check_set(elems: &[u64]) {
println!("\n\ncheck_set {:?}\n", elems);
let mut s = SetU64::default();
s.debug_me("default set");
let mut count = 0;
for x in elems.iter().cloned() {
let was_here = s.contains(x);
s.debug_me(&format!("\n\n\nabout to insert {}", x));
let changed_something = s.insert(x);
s.debug_me(&format!("\n\nafter inserting {}", x));
if changed_something {
count += 1;
println!(" {} is new now count {}", x, count);
}
assert_eq!(!was_here, changed_something);
s.debug_me(&format!("after inserting {} length is {}", x, s.len()));
println!("what is this? count {} does it have {}?", count, x);
assert!(s.contains(x));
assert_eq!(s.len(), count);
assert_eq!(s.iter().count(), count);
}
assert!(elems.len() >= s.len());
assert_eq!(elems.iter().cloned().min(), s.iter().min());
println!("set {:?} with length {}", elems, s.len());
for x in s.iter() {
println!(" {}", x);
}
s.debug_me("finally");
assert_eq!(s.iter().count(), s.len());
for x in s.iter() {
println!("looking for {}", x);
assert!(elems.contains(&x));
}
for x in s.iter() {
println!("found {}", x);
assert!(elems.contains(&x));
}
for x in elems.iter().cloned() {
println!("YYYY looking for {}", x);
assert!(s.contains(x));
}
for x in elems.iter().cloned() {
println!("removing {}", x);
s.remove(x);
s.debug_me(" after remove");
}
for x in elems.iter().cloned() {
println!("XXXX looking for {}", x);
assert!(!s.contains(x));
}
s.debug_me("after everything was removed");
assert_eq!(s.len(), 0);
check_size(elems);
}
#[test]
fn check_sets() {
check_set(&[
2020521336280004635,
17919264261434241137,
2238430514865874295,
6993057942733118921,
151320868361192487,
]);
check_set(&[
1121917459475225854,
2080724155257001326,
4615424731220156355,
0,
]);
check_set(&[
12891372448885225674,
7003397808690412416,
129282323776365774,
9248739364838008708,
]);
check_set(&[]);
check_set(&[10]);
check_set(&[1024]);
check_set(&[1]);
check_set(&[0]);
check_set(&[0, 1]);
check_set(&[0, 1]);
check_set(&[0, 1, 2, 3]);
check_set(&[0, 1, 0, 2, 3]);
check_set(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
check_set(&[0, 1, 2, 3, 4, 5, 6, 7 << 30, 8, 9, 10]);
check_set(&[0, 2097153, 2, 2305843009213693952]);
check_set(&[0, 1024]);
check_set(&[0, 1024, 1 << 61]);
check_set(&[0, 1024, 1 << 63]);
}
fn check_tiny_from_vec(slice: Vec<u64>) {
println!("\n\n\ncheck_tiny_from_vec({:?})", slice);
let mut t = if let Some(t) = Tiny::from_singleton(slice[0]) {
t
} else {
return; };
t.debug_me("to start with");
let mut count = 1;
let mut included = Vec::new();
for x in slice.iter().cloned() {
t.debug_me(&format!("starting in on {}", x));
assert_eq!(t.clone().any(|e| e == x), t.contains(x));
let already = t.clone().any(|e| e == x);
let next = t.insert(x);
if let Some(tt) = next {
tt.debug_me(" inserting gives");
assert_eq!(tt.sz == t.sz, already);
if tt.sz != t.sz {
count += 1;
included.push(x);
}
t = tt;
t.debug_me("hello");
assert!(t.clone().any(|e| e == x));
assert!(t.insert(x).is_some()); t.debug_me("hello again");
assert_eq!(t.sz, count);
for xx in t.clone() {
assert!(t.contains(xx));
assert!(t.clone().any(|x| x == xx));
}
} else {
assert!(!already);
}
t.debug_me("at end of loop");
}
for x in included.iter().cloned() {
assert!(t.contains(x));
}
}
#[test]
fn check_specific_tinies() {
check_tiny_from_vec(vec![49, 50, 1]);
check_tiny_from_vec(vec![1]);
check_tiny_from_vec(vec![1, 2]);
check_tiny_from_vec(vec![1000000, 1000030]);
check_tiny_from_vec(vec![3000027656, 3000030504]);
check_tiny_from_vec(vec![3127827656, 3125730504]);
check_tiny_from_vec(vec![3127827656, 3125730504, 1]);
check_tiny_from_vec(vec![2, 3, 143, 251, 1, 130, 251]);
check_tiny_from_vec(vec![1, 130, 131, 132, 133, 251]);
}
use proptest::prelude::*;
proptest! {
#[test]
fn copycheck_random_sets(slice in prop::collection::vec(1u64..5, 1usize..10)) {
crate::copyset::check_set::<SetU64>(&slice);
}
#[test]
fn copycheck_medium_sets(slice in prop::collection::vec(1u64..255, 1usize..100)) {
crate::copyset::check_set::<SetU64>(&slice);
}
#[test]
fn copycheck_big_sets(slice: Vec<u64>) {
crate::copyset::check_set::<SetU64>(&slice);
}
}
proptest! {
#[test]
fn check_random_sets(slice in prop::collection::vec(1u64..5, 1usize..10)) {
check_set(&slice);
}
#[test]
fn check_medium_sets(slice in prop::collection::vec(1u64..255, 1usize..100)) {
check_set(&slice);
}
#[test]
fn check_big_sets(slice: Vec<u64>) {
check_set(&slice);
}
#[test]
fn check_tiny(slice in prop::collection::vec(1u64..0xffffffff, 1usize..9)) {
check_tiny_from_vec(slice);
}
}
#[test]
fn check_specific_sets() {
check_set(&[
2847318633310315892,
63058418965769059,
2042910419467651321,
1999840121589118486,
16041957357413958548,
3644150915196633528,
11391567487966916668,
2789376712080388913,
8889702475440805467,
16888113214698725429,
1249634136756270040,
15461625332902556004,
8159161795026448273,
12009229139422836646,
15912096166435473807,
]);
}
fn total_size_of<T: HeapSizeOf>(x: &T) -> usize {
std::mem::size_of::<T>() + x.heap_size_of_children()
}
fn check_size(v: &[u64]) {
let s: SetU64 = v.iter().cloned().collect();
let hs: std::collections::HashSet<_> = v.iter().cloned().collect();
println!("setu64: {}", total_size_of(&s));
println!("hashst: {}", total_size_of(&hs));
assert!(total_size_of(&s) < total_size_of(&hs));
}
#[cfg(test)]
fn collect_size_is(v: &[u64], sz: usize) {
let s: SetU64 = v.iter().cloned().collect();
println!("collect_size_is {:?} == {} =? {}", v, total_size_of(&s), sz);
println!(" num elements = {}", s.len());
assert_eq!(total_size_of(&s), sz);
}
#[cfg(test)]
fn incremental_size_le(v: &[u64], sz: usize) {
for _ in 1..1000 {
let sc: SetU64 = v.iter().cloned().collect();
if total_size_of(&sc) > sz {
println!("collect_size {:?} = {} > {}", v, total_size_of(&sc), sz);
}
assert!(total_size_of(&sc) <= sz);
let mut s = SetU64::new();
let mut vv = Vec::new();
for x in v.iter().cloned() {
s.insert(x);
vv.push(x);
if total_size_of(&s) > sz {
println!("incremental_size {:?} = {} > {}", vv, total_size_of(&s), sz);
}
assert!(total_size_of(&s) <= sz);
}
}
}
#[cfg(target_pointer_width = "64")]
#[test]
fn test_size() {
assert_eq!(std::mem::size_of::<S>(), 32);
collect_size_is(&[0], 8);
incremental_size_le(&[0], 8);
collect_size_is(&[], 8);
incremental_size_le(&[], 8);
collect_size_is(&[3000], 8);
collect_size_is(&[1, 2, 3, 4, 5, 6], 8);
incremental_size_le(&[1, 2, 3, 4, 5, 6], 8);
collect_size_is(&[1000, 1002, 1004, 1006, 1008], 8);
incremental_size_le(&[1000, 1002, 1004, 1006, 1008], 8);
collect_size_is(&[255, 260, 265, 270, 275, 280, 285], 8);
collect_size_is(&[1000, 1002, 1004, 1006, 1008, 1009, 1010], 8);
incremental_size_le(&(0..7).collect::<Vec<_>>(), 8);
incremental_size_le(&(10..10 + 7).collect::<Vec<_>>(), 8);
incremental_size_le(&(100..100 + 7).collect::<Vec<_>>(), 8);
incremental_size_le(&(1000..1000 + 7).collect::<Vec<_>>(), 8);
incremental_size_le(&(10000..10000 + 7).collect::<Vec<_>>(), 8);
incremental_size_le(&(100000..100000 + 7).collect::<Vec<_>>(), 8);
incremental_size_le(&(1..30).collect::<Vec<_>>(), 40);
incremental_size_le(&(1..60).collect::<Vec<_>>(), 40);
incremental_size_le(&(1..160).collect::<Vec<_>>(), 88);
incremental_size_le(&(0..100).map(|x| x * 10).collect::<Vec<_>>(), 400);
}
#[cfg(target_pointer_width = "32")]
#[test]
fn test_size() {
assert_eq!(std::mem::size_of::<S>(), 24);
collect_size_is(&[0], 4);
incremental_size_le(&[0], 4);
collect_size_is(&[], 4);
incremental_size_le(&[], 4);
collect_size_is(&[3000], 4);
collect_size_is(&[1, 2, 3, 4, 5, 6], 4);
incremental_size_le(&[1, 2, 3, 4, 5, 6], 4);
collect_size_is(&[1000, 1002, 1004, 1006, 1008], 4);
incremental_size_le(&[1000, 1002, 1004, 1006, 1008], 4);
collect_size_is(&[255, 260, 265, 270, 275, 280, 285], 4);
incremental_size_le(&(1..30).collect::<Vec<_>>(), 32);
incremental_size_le(&(1..60).collect::<Vec<_>>(), 32);
incremental_size_le(&(1..160).collect::<Vec<_>>(), 84);
incremental_size_le(&(0..100).map(|x| x * 10).collect::<Vec<_>>(), 400);
}
fn check_set_primitives(elems: &[u64]) {
let sz = elems.len();
println!("\n\nprimitives: {:?}\n", elems);
let mut a = vec![0; sz];
for x in elems.iter().cloned() {
if p_lookfor(x, &a, 0).key_found() {
println!("we already have {}", x);
} else {
let i = p_insert(x, &mut a, 0);
a[i] = x;
}
println!(" after inserting {} we have {:?}", x, a);
assert!(p_lookfor(x, &a, 0).key_found());
}
for x in elems.iter().cloned() {
println!("looking for {}", x);
assert!(p_lookfor(x, &a, 0).key_found());
}
for x in elems.iter().cloned() {
println!("removing {}", x);
p_remove(x, &mut a, 0);
println!(" after removing {} we have {:?}", x, a);
}
for x in elems.iter().cloned() {
println!("XXXX looking for {}", x);
assert!(p_lookfor(x, &a, 0).empty_spot());
}
println!("after everything was removed: {:?}", a);
for x in a.iter().cloned() {
assert_eq!(x, 0);
}
}
proptest! {
#[test]
fn check_random_primitives(slice in prop::collection::vec(1u64..50, 1usize..100)) {
check_set_primitives(&slice);
}
}
}
fn p_poverty(k: u64, idx: usize, n: usize) -> usize {
((idx % n) + n - (k % n as u64) as usize) % n
}
fn p_insert(k: u64, a: &mut [u64], offset: u64) -> usize {
let n = a.len();
for pov in 0..n {
let ii = (((k % n as u64) + pov as u64) % n as u64) as usize;
let ki = a[ii] >> offset;
let pov_ki = p_poverty(ki, ii, n);
if a[ii] == 0 || ki == k {
return ii;
} else if pov_ki < pov {
let stolen = ii;
let mut displaced = a[ii];
let mut pov_displaced = pov_ki;
a[stolen] = 0;
for j in 1..n {
pov_displaced += 1;
let jj = (stolen + j) % n;
let kj = a[jj] >> offset;
let pov_kj = p_poverty(kj, jj, n);
if a[jj] == 0 {
a[jj] = displaced;
return stolen;
}
if pov_kj < pov_displaced {
std::mem::swap(&mut a[jj], &mut displaced);
pov_displaced = pov_kj;
}
}
panic!("p_insert was called when there was no room!")
}
}
unreachable!()
}
#[test]
fn test_insert() {
let mut a = [0, 0, 0, 0];
assert_eq!(2, p_insert(2, &mut a, 0));
assert_eq!(&a, &[0, 0, 0, 0]);
for i in 0..10 {
assert_eq!(0, a[p_insert(i, &mut a, 0)]);
}
let mut a = [0, 0, 6, 0];
assert_eq!(3, p_insert(2, &mut a, 0));
assert_eq!(&a, &[0, 0, 6, 0]);
for i in 0..10 {
assert!([0, i].contains(&a[p_insert(i, &mut a, 0)]));
}
let mut a = [0, 0, 6, 3];
assert_eq!(3, p_insert(2, &mut a, 0));
assert_eq!(&a, &[3, 0, 6, 0]);
for i in 0..10 {
assert!([0, i].contains(&a[p_insert(i, &mut a, 0)]));
}
}
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
enum LookedUp {
EmptySpot(usize),
KeyFound(usize),
NeedInsert,
}
impl LookedUp {
fn key_found(self) -> bool {
if let LookedUp::KeyFound(_) = self {
true
} else {
false
}
}
#[cfg(test)]
fn empty_spot(self) -> bool {
if let LookedUp::EmptySpot(_) = self {
true
} else {
false
}
}
#[cfg(test)]
fn unwrap(self) -> usize {
if let LookedUp::KeyFound(idx) = self {
idx
} else {
panic!("unwrap called on {:?}", self)
}
}
}
fn p_lookfor(k: u64, a: &[u64], offset: u64) -> LookedUp {
let n = a.len();
for pov in 0..n {
let ii = (((k % n as u64) + pov as u64) % n as u64) as usize;
if a[ii] == 0 {
return LookedUp::EmptySpot(ii);
}
let ki = a[ii] >> offset;
let pov_ki = p_poverty(ki, ii, n);
if ki == k {
return LookedUp::KeyFound(ii);
} else if pov_ki < pov {
return LookedUp::NeedInsert;
}
}
LookedUp::NeedInsert
}
#[test]
fn test_lookfor() {
assert_eq!(LookedUp::NeedInsert, p_lookfor(5, &[3, 1, 2], 0));
assert_eq!(LookedUp::NeedInsert, p_lookfor(5, &[3, 0, 2], 0));
assert_eq!(LookedUp::KeyFound(3), p_lookfor(7, &[0, 0, 0, 7], 0));
}
fn p_remove(k: u64, a: &mut [u64], offset: u64) -> bool {
let n = a.len();
for i in 0..n {
let ii = (((k % n as u64) + i as u64) % n as u64) as usize;
if a[ii] == 0 {
return false;
}
let ki = a[ii] >> offset;
let iki = (((ii + n) as u64 - (ki % n as u64)) % n as u64) as usize;
if i > iki {
return false;
} else if ki == k {
a[ii] = 0;
let mut previous = ii;
for j in 1..n {
let jj = (ii + j) % n;
let kj = a[jj] >> offset;
let pov_kj = p_poverty(kj, jj, n);
if a[jj] == 0 || pov_kj == 0 {
return true;
}
a[previous] = a[jj];
a[jj] = 0;
previous = jj;
}
return true;
}
}
false
}
#[cfg(test)]
fn test_insert_remove(x: u64, a: &mut [u64]) {
println!("test_insert_remove({}, {:?})", x, a);
let v: Vec<u64> = a.iter().cloned().collect();
assert!(!p_remove(x, a, 0));
assert!(!a.contains(&x)); assert!(!p_lookfor(x, a, 0).key_found());
a[p_insert(x, a, 0)] = x;
assert!(a.contains(&x));
println!(" after insertion of {} a is {:?}", x, a);
assert!(p_lookfor(x, a, 0).key_found());
assert_eq!(x, a[p_lookfor(x, a, 0).unwrap()]);
assert!(p_remove(x, a, 0));
println!(" after remove of {} a is {:?}", x, a);
assert_eq!(a, &v[..]);
}
#[test]
fn test_remove() {
let mut a = [0, 0, 2];
a[p_insert(5, &mut a, 0)] = 5;
assert_eq!(&[5, 0, 2], &a);
p_remove(2, &mut a, 0);
println!("after removal {:?}", a);
assert!(p_lookfor(5, &a, 0).key_found());
assert_eq!(&[0, 0, 5], &a);
test_insert_remove(7, &mut [0, 0, 0, 0]);
test_insert_remove(2, &mut [0, 1, 5, 0]);
test_insert_remove(5, &mut [0, 0, 2]);
}
#[test]
fn p_remove_from_small_full() {
let mut a = [258, 260];
assert!(!p_remove(2, &mut a, 0));
assert_eq!(a[0], 258);
assert_eq!(a[1], 260);
assert!(p_remove(258, &mut a, 0));
assert_eq!(a[0], 260);
assert_eq!(a[1], 0);
}