1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
use core::{
cmp, fmt,
ops::{BitAnd, BitAndAssign, BitOr, Not, Shl},
slice::Iter,
};
pub use internal_unsafe::FixedBitSet;
use internal_unsafe::Len;
use spacetimedb_sats::memory_usage::MemoryUsage;
/// A type used to represent blocks in a bit set.
/// A smaller type, compared to usize,
/// means taking less advantage of native operations.
/// A larger type means we might over-allocate more.
pub trait BitBlock:
Copy
+ Eq
+ Not<Output = Self>
+ BitAnd<Self, Output = Self>
+ BitAndAssign
+ BitOr<Self, Output = Self>
+ Shl<usize, Output = Self>
{
/// The number of bits that [`Self`] can represent.
const BITS: u32;
/// The first bit is set.
const ONE: Self;
/// No bits are set.
const ZERO: Self;
fn wrapping_sub(self, rhs: Self) -> Self;
fn trailing_zeros(self) -> u32;
}
type DefaultBitBlock = u64;
impl BitBlock for DefaultBitBlock {
const BITS: u32 = Self::BITS;
const ONE: Self = 1;
const ZERO: Self = 0;
#[inline]
fn wrapping_sub(self, rhs: Self) -> Self {
self.wrapping_sub(rhs)
}
#[inline]
fn trailing_zeros(self) -> u32 {
self.trailing_zeros()
}
}
/// The internals of `FixedBitSet`.
/// Separated from the higher level APIs to contain the safety boundary.
mod internal_unsafe {
use spacetimedb_lib::{de::Deserialize, ser::Serialize};
use spacetimedb_sats::{impl_deserialize, impl_serialize};
use super::{BitBlock, DefaultBitBlock};
use crate::{static_assert_align, static_assert_size};
use core::{
mem,
ptr::NonNull,
slice::{from_raw_parts, from_raw_parts_mut},
};
/// The type used to represent the number of bits the set can hold.
///
/// Currently `u16` to keep `mem::size_of::<FixedBitSet>()` small.
pub(super) type Len = u16;
/// A bit set that, once created, has a fixed size over time.
///
/// The set can store at most `u16::MAX` number of bits.
#[repr(C, packed)]
pub struct FixedBitSet<B = DefaultBitBlock> {
/// The size of the heap allocation in number of elements.
len: Len,
/// A pointer to a heap allocation of `[B]` of `self.len`.
ptr: NonNull<B>,
}
static_assert_align!(FixedBitSet, 1);
static_assert_size!(FixedBitSet, mem::size_of::<usize>() + mem::size_of::<Len>());
// SAFETY: `FixedBitSet` owns its data.
unsafe impl<B> Send for FixedBitSet<B> {}
// SAFETY: `FixedBitSet` owns its data.
unsafe impl<B> Sync for FixedBitSet<B> {}
impl<B> Drop for FixedBitSet<B> {
fn drop(&mut self) {
let blocks = self.storage_mut();
// SAFETY: We own the memory region pointed to by `blocks`,
// and as we have `&'0 mut self`, we also have exclusive access to it.
// So, and since we are in `drop`,
// we can deallocate the memory as we are the last referent to it.
// Moreover, the memory was allocated in `Self::new(..)` using `vec![..]`,
// which will allocate using `Global`, so we can convert it back to a `Box`.
let _ = unsafe { Box::from_raw(blocks) };
}
}
// We need to be able to serialize and deserialize `FixedBitSet` because they appear in the `PageHeader`.
impl_serialize!([B: BitBlock + Serialize] FixedBitSet<B>, (self, ser) => self.storage().serialize(ser));
impl_deserialize!([B: BitBlock + Deserialize<'de>] FixedBitSet<B>, de => {
let storage = Box::<[B]>::deserialize(de)?;
Ok(Self::from_boxed_slice(storage))
});
impl<B: BitBlock> FixedBitSet<B> {
pub(super) fn from_boxed_slice(storage: Box<[B]>) -> Self {
// SAFETY: required for the soundness of `Drop` as
// `dealloc` must receive the same layout as it was `alloc`ated with.
assert!(storage.len() <= Len::MAX as usize);
let len = storage.len() as Len;
let ptr = NonNull::from(Box::leak(storage)).cast();
Self { ptr, len }
}
}
impl<B> FixedBitSet<B> {
/// Returns the capacity of the bitset.
#[inline]
pub(super) const fn blocks(&self) -> usize {
self.len as usize
}
/// Returns the backing `[B]` slice for shared access.
pub(crate) const fn storage(&self) -> &[B] {
let ptr = self.ptr.as_ptr();
let len = self.blocks();
// SAFETY:
// - `self.ptr` is a `NonNull` so `ptr` cannot be null.
// - `self.ptr` is properly aligned for `BitBlock`s.
// - `self.ptr` is valid for reads as we have `&self` and we own the memory
// which we know is `blocks` elements long.
// - As we have `&'0 self`, elsewhere cannot mutate the memory during `'0`
// except through an `UnsafeCell`.
unsafe { from_raw_parts(ptr, len) }
}
/// Returns the backing `[B]` slice for mutation.
pub(super) fn storage_mut(&mut self) -> &mut [B] {
let ptr = self.ptr.as_ptr();
let len = self.blocks();
// SAFETY:
// - `self.ptr` is a `NonNull` so `ptr` cannot be null.
// - `self.ptr` is properly aligned for `BitBlock`s.
// - `self.ptr` is valid for reads and writes as we have `&mut self` and we own the memory
// which we know is `blocks` elements long.
// - As we have `&'0 mut self`, we have exclusive access for `'0`
// so the memory cannot be accessed elsewhere during `'0`.
unsafe { from_raw_parts_mut(ptr, len) }
}
}
}
impl<B: BitBlock> cmp::Eq for FixedBitSet<B> {}
impl<B: BitBlock> cmp::PartialEq for FixedBitSet<B> {
fn eq(&self, other: &Self) -> bool {
self.storage() == other.storage()
}
}
/// Computes how many blocks are needed to store that many bits.
fn blocks_for_bits<B: BitBlock>(bits: usize) -> usize {
// Must round e.g., 31 / 32 to 1 and 32 / 32 to 1 as well.
bits.div_ceil(B::BITS as usize)
}
impl<B: BitBlock> FixedBitSet<B> {
/// Allocates a new bit set capable of holding `bits` number of bits.
pub fn new(bits: usize) -> Self {
Self::new_for_blocks(blocks_for_bits::<B>(bits))
}
/// Allocates a new bit set that will have a capacity of `nblocks` blocks.
#[inline]
fn new_for_blocks(nblocks: usize) -> Self {
// Allocate the blocks and extract the pointer to the heap region.
let blocks: Box<[B]> = vec![B::ZERO; nblocks].into_boxed_slice();
Self::from_boxed_slice(blocks)
}
/// Converts `idx` to its block index and the index within the block.
const fn idx_to_pos(idx: usize) -> (usize, usize) {
let bits = B::BITS as usize;
(idx / bits, idx % bits)
}
/// Returns whether `idx` is set or not.
pub fn get(&self, idx: usize) -> bool {
let (block_idx, pos_in_block) = Self::idx_to_pos(idx);
let block = self.storage()[block_idx];
(block & (B::ONE << pos_in_block)) != B::ZERO
}
/// Sets bit at position `idx` to `val`.
pub fn set(&mut self, idx: usize, val: bool) {
let (block_idx, pos_in_block) = Self::idx_to_pos(idx);
let block = &mut self.storage_mut()[block_idx];
// Update the block.
let flag = B::ONE << pos_in_block;
*block = if val { *block | flag } else { *block & !flag };
}
/// Clears every bit in the vec.
pub fn clear(&mut self) {
self.storage_mut().fill(B::ZERO);
}
/// Resets the bit set so that it's capable of holding `bits` number of bits.
///
/// Every bit in the set will be zero after this.
pub fn reset_for(&mut self, bits: usize) {
// Compute the number of blocks needed.
let nblocks = blocks_for_bits::<B>(bits);
// Either clear the existing set, reusing it, or make a new one.
if nblocks == self.blocks() {
self.clear();
} else {
*self = Self::new_for_blocks(nblocks)
}
}
/// Returns the capacity of the bitset in bits.
#[inline]
pub(crate) const fn bits(&self) -> usize {
self.blocks() * B::BITS as usize
}
/// Returns all the set indices.
pub fn iter_set(&self) -> IterSet<'_, B> {
let mut inner = self.storage().iter();
// Fetch the first block; if it isn't there, use an all-zero one.
// This will cause the iterator to terminate immediately.
let curr = inner.next().copied().unwrap_or(B::ZERO);
IterSet {
inner,
curr,
block_idx: 0,
}
}
/// Returns all the set indices from `start_idx` inclusive.
pub fn iter_set_from(&self, start_idx: usize) -> IterSet<'_, B> {
// Translate the index to its block and position within it.
let (block_idx, pos_in_block) = Self::idx_to_pos(start_idx);
// We want our iteration to start from the block that includes `start_idx`.
let mut inner = self.storage()[block_idx..].iter();
// Fetch the first block; if it isn't there, use an all-zero one.
// This will cause the iterator to terminate immediately.
let curr = inner.next().copied().unwrap_or(B::ZERO);
// Our `start_idx` might be in the middle of the `curr` block.
// To resolve this, we must zero out any preceding bits.
// So e.g., for `B = u8`,
// we must transform `0000_1011` to `0000_1000` for `start_idx = 3`.
let zero_preceding_mask = B::ZERO.wrapping_sub(B::ONE << pos_in_block);
let curr = curr & zero_preceding_mask;
IterSet {
inner,
curr,
block_idx: block_idx as Len,
}
}
}
impl<B> MemoryUsage for FixedBitSet<B> {
fn heap_usage(&self) -> usize {
std::mem::size_of_val(self.storage())
}
}
impl<B: BitBlock> fmt::Debug for FixedBitSet<B> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_set().entries(self.iter_set()).finish()
}
}
/// An iterator that yields the set indices of a [`FixedBitSet`].
pub struct IterSet<'a, B = DefaultBitBlock> {
/// The block iterator.
inner: Iter<'a, B>,
/// The current block being processed, taken from `self.inner`.
curr: B,
/// What the index of `self.curr` is.
block_idx: Len,
}
impl<B: BitBlock> Iterator for IterSet<'_, B> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
loop {
let tz = self.curr.trailing_zeros();
if tz < B::BITS {
// Some bit was set; so yield the index of that
// and zero the bit out so we don't yield it again.
self.curr &= self.curr.wrapping_sub(B::ONE);
let idx = self.block_idx as u32 * B::BITS + tz;
return Some(idx as usize);
} else {
// No bit is set; advance to the next block, or quit if none left.
self.curr = *self.inner.next()?;
self.block_idx += 1;
}
}
}
}
#[cfg(test)]
pub(crate) mod test {
use super::*;
use proptest::bits::bitset::between;
use proptest::prelude::*;
use spacetimedb_data_structures::map::HashSet;
#[test]
#[should_panic]
fn zero_sized_is_ok() {
let mut set = FixedBitSet::<DefaultBitBlock>::new(0);
set.clear();
set.iter_set_from(0).count();
set.iter_set().count();
set.get(0);
}
const MAX_NBITS: usize = 1000;
proptest! {
#![proptest_config(ProptestConfig::with_cases(if cfg!(miri) { 8 } else { 2048 }))]
#[test]
fn after_new_there_are_no_bits_set(nbits in 0..MAX_NBITS) {
let set = FixedBitSet::<DefaultBitBlock>::new(nbits);
for idx in 0..nbits {
prop_assert!(!set.get(idx));
}
}
#[test]
fn after_clear_there_are_no_bits_set(choices in between(0, MAX_NBITS)) {
let nbits = choices.get_ref().len();
let mut set = FixedBitSet::<DefaultBitBlock>::new(nbits);
// Set all the bits chosen.
for idx in &choices {
prop_assert!(!set.get(idx));
set.set(idx, true);
prop_assert!(set.get(idx));
}
// Clear!
set.clear();
// After clearing, all bits should be unset.
for idx in 0..nbits {
prop_assert!(!set.get(idx));
}
}
#[test]
fn get_set_consistency(choices in between(0, MAX_NBITS)) {
let nbits = choices.get_ref().len();
let mut set = FixedBitSet::<DefaultBitBlock>::new(nbits);
// Set all the bits chosen.
for idx in &choices {
prop_assert!(!set.get(idx));
// After setting, it's true.
set.set(idx, true);
prop_assert!(set.get(idx));
// And this is idempotent.
set.set(idx, true);
prop_assert!(set.get(idx));
}
// Build the "complement" of `choices`.
let choices: HashSet<_> = choices.into_iter().collect();
let universe: HashSet<_> = (0..nbits).collect();
for idx in universe.difference(&choices) {
prop_assert!(!set.get(*idx));
}
// Unset all the bits chosen.
for idx in &choices {
// After unsetting, it's false.
set.set(*idx, false);
prop_assert!(!set.get(*idx));
// And this is idempotent.
set.set(*idx, false);
prop_assert!(!set.get(*idx));
}
}
#[test]
fn iter_set_preserves_order_of_original_choices(choices in between(0, MAX_NBITS)) {
let nbits = choices.get_ref().len();
// Set all the bits chosen.
let mut set = FixedBitSet::<DefaultBitBlock>::new(nbits);
for idx in &choices {
set.set(idx, true);
}
// `iter_set` produces the same list `choices`.
let collected = set.iter_set().collect::<Vec<_>>();
let original = choices.iter().collect::<Vec<_>>();
prop_assert_eq!(&original, &collected);
if let [_, second, ..] = &*original {
// Starting from the second yields the same list as `choices[1..]`.
let collected = set.iter_set_from(*second).collect::<Vec<_>>();
prop_assert_eq!(&original[1..], &collected);
}
// `iter_set_from` and `iter_set` produce the same list.
prop_assert_eq!(collected, set.iter_set_from(0).collect::<Vec<_>>());
}
#[test]
fn serde_round_trip(choices in between(0, MAX_NBITS)) {
let nbits = choices.get_ref().len();
// Set all the bits chosen.
let mut set = FixedBitSet::<DefaultBitBlock>::new(nbits);
for idx in &choices {
set.set(idx, true);
}
let ser = spacetimedb_lib::bsatn::to_vec(&set)?;
let de = spacetimedb_lib::bsatn::from_slice::<FixedBitSet<DefaultBitBlock>>(&ser)?;
assert!(set == de);
}
}
}