hi_sparse_bitset/
bitset.rs

1mod block;
2mod level;
3mod serialization;
4#[cfg(feature="serde")]
5mod serde;
6
7use std::mem::{ManuallyDrop, MaybeUninit};
8use std::ptr::NonNull;
9use crate::config::Config;
10use block::Block;
11use crate::bitset::level::{IBlock, Level};
12use crate::{level_indices, BitBlock, BitSetBase, BitSetInterface, DataBlock};
13use crate::bitset_interface::{LevelMasks, LevelMasksIterExt};
14use crate::internals::{impl_bitset, Primitive};
15
16type Level0Block<Conf> = Block<
17    <Conf as Config>::Level0BitBlock, 
18    <Conf as Config>::Level0BlockIndices
19>;
20type Level1Block<Conf> = Block<
21    <Conf as Config>::Level1BitBlock,
22    <Conf as Config>::Level1BlockIndices
23>;
24type LevelDataBlock<Conf> = Block<
25    <Conf as Config>::DataBitBlock, [usize;0]
26>;
27
28/// Hierarchical sparse bitset.
29///
30/// Tri-level hierarchy. Highest uint it can hold
31/// is [Level0BitBlock]::size() * [Level1BitBlock]::size() * [DataBitBlock]::size().
32///
33/// Only the last level contains blocks of actual data. Empty(skipped) data blocks
34/// are not allocated.
35///
36/// Structure optimized for intersection speed. 
37/// _(Other inter-bitset operations are in fact fast too - but intersection 
38/// has the lowest algorithmic complexity.)_
39/// Insert/remove/contains is fast O(1) too.
40/// 
41/// [Level0BitBlock]: crate::config::Config::Level0BitBlock
42/// [Level1BitBlock]: crate::config::Config::Level1BitBlock
43/// [DataBitBlock]: crate::config::Config::DataBitBlock
44pub struct BitSet<Conf: Config> {
45    level0: Level0Block<Conf>,
46    level1: Level<Level1Block<Conf>>,
47    data  : Level<LevelDataBlock<Conf> >,
48}
49
50impl<Conf: Config> Clone for BitSet<Conf> {
51    fn clone(&self) -> Self {
52        Self{
53            level0: self.level0.clone(),
54            level1: self.level1.clone(),
55            data  : self.data.clone(),
56        }
57    }
58}
59
60impl<Conf: Config> Default for BitSet<Conf> {
61    fn default() -> Self {
62        Self{
63            level0: Default::default(),
64            level1: Default::default(),
65            data  : Default::default(),
66        }
67    }    
68}
69
70impl<Conf: Config> FromIterator<usize> for BitSet<Conf> {
71    fn from_iter<T: IntoIterator<Item=usize>>(iter: T) -> Self {
72        let mut this = Self::default();
73        this.extend(iter);
74        this
75    }    
76}
77
78impl<Conf: Config> FromIterator<DataBlock<Conf::DataBitBlock>> for BitSet<Conf> {
79    /// It is allowed for blocks with the same range to repeat in iterator.
80    /// Like `([1,42], [15,27,61])`. Their data will be merged.
81    fn from_iter<T: IntoIterator<Item=DataBlock<Conf::DataBitBlock>>>(iter: T) -> Self {
82        let mut this = Self::default();
83        this.extend(iter);
84        this
85    }    
86}
87
88impl<Conf: Config> Extend<usize> for BitSet<Conf> {
89    fn extend<T: IntoIterator<Item=usize>>(&mut self, iter: T) {
90        for i in iter {
91            self.insert(i);
92        }
93    }
94}
95
96impl<Conf: Config> Extend<DataBlock<Conf::DataBitBlock>> for BitSet<Conf> {
97    /// It is allowed for blocks with the same range to repeat in iterator.
98    /// Like `([1,42], [15,27,61])`. Their data will be merged.
99    fn extend<T: IntoIterator<Item=DataBlock<Conf::DataBitBlock>>>(&mut self, iter: T) {
100        for b in iter {
101            self.insert_block(b);
102        }
103    }
104}
105
106impl<Conf: Config, const N: usize> From<[usize; N]> for BitSet<Conf> {
107    #[inline]
108    fn from(value: [usize; N]) -> Self {
109        Self::from_iter(value.into_iter())
110    }    
111}
112
113impl<Conf, B> From<B> for BitSet<Conf>
114where
115    Conf: Config, 
116    B: BitSetInterface<Conf = Conf>
117{
118    /// Materialize any [BitSetInterface]. 
119    #[inline]
120    fn from(bitset: B) -> Self {
121        /*if B::TRUSTED_HIERARCHY{
122            todo!("optimized special case with hierarchies + prealocated space")
123        }*/
124        
125        // number of blocks in each level unknown.
126        // insert block by block.
127        // We only know that blocks come in order.
128        let mut this = Self::default();
129        let mut global_level1_index = usize::MAX;
130        let mut level1_block_ptr: Option<NonNull<Level1Block<Conf>>> = None;
131        bitset.block_iter().for_each(|block|{
132            // block can be empty
133            if block.is_empty(){
134                return;
135            }
136            
137            // TODO: block_iter could just return these
138            let (inner_level0_index, inner_level1_index, _) = Self::level_indices(block.start_index);
139            
140            // block.start_index / 2^Conf::DataBitBlock::SIZE_POT_EXPONENT
141            let current_level1_block_index = block.start_index >> Conf::DataBitBlock::SIZE_POT_EXPONENT;
142            if current_level1_block_index != global_level1_index {
143                global_level1_index = current_level1_block_index;
144                
145                // 1. Level0
146                let level1_block_index = unsafe{
147                    this.level0.get_or_insert(inner_level0_index, ||{
148                        let block_index = this.level1.insert_block();
149                        Primitive::from_usize(block_index)
150                    })
151                }.as_usize();
152
153                // 2. Level1
154                level1_block_ptr = Some(NonNull::from(
155                    unsafe{
156                        this.level1.blocks_mut().get_unchecked_mut(level1_block_index)
157                    }
158                ));
159            }
160
161            // 3. Data Level
162            unsafe{
163                // Make data block.
164                let mut data_block = LevelDataBlock::<Conf>::default();
165                *data_block.mask_mut() = block.bit_block;
166                
167                // Push block to data level. 
168                let data_block_index = this.data.push_block(data_block);
169
170                // Register block's index in level1.
171                level1_block_ptr.unwrap_unchecked().as_mut()
172                    .insert_unchecked(inner_level1_index, Primitive::from_usize(data_block_index));
173            }
174        });
175        this
176    }    
177}
178
179impl<Conf: Config> BitSet<Conf> {
180    #[inline]
181    fn level_indices(index: usize) -> (usize/*level0*/, usize/*level1*/, usize/*data*/){
182        level_indices::<Conf>(index)
183    }
184    
185    /// Max usize, [BitSet] with this `Config` can hold.
186    /// 
187    /// [BitSet]: crate::BitSet
188    #[inline]
189    pub const fn max_capacity() -> usize {
190        Conf::MAX_CAPACITY
191    }      
192    
193    #[inline]
194    fn is_in_range(index: usize) -> bool{
195        index < Self::max_capacity()
196    }
197
198    /// # Safety
199    /// 
200    /// indices are not checked
201    #[inline]
202    unsafe fn get_block_indices(&self, level0_index: usize, level1_index: usize)
203        -> (usize/*level1_block_index*/, usize/*data_block_index*/)
204    {
205        let level1_block_index = self.level0.get_or_zero(level0_index).as_usize();
206        let level1_block = self.level1.blocks().get_unchecked(level1_block_index);
207        let data_block_index = level1_block.get_or_zero(level1_index).as_usize();
208        (level1_block_index, data_block_index)
209    }
210    
211    /// # Safety
212    /// 
213    /// indices are not checked
214    #[inline]
215    unsafe fn get_or_insert_data_block(&mut self, level0_index: usize, level1_index: usize) 
216        -> &mut LevelDataBlock<Conf>
217    {
218        // 1. Level0
219        let level1_block_index = 
220            self.level0.get_or_insert(level0_index, ||{
221                let block_index = self.level1.insert_block();
222                Primitive::from_usize(block_index)
223            }).as_usize();
224
225        // 2. Level1
226        let data_block_index = { 
227            let level1_block = self.level1.blocks_mut().get_unchecked_mut(level1_block_index);
228            level1_block.get_or_insert(level1_index, ||{
229                let block_index = self.data.insert_block();
230                Primitive::from_usize(block_index)
231            }).as_usize()
232        };
233
234        // 3. Data block
235        self.data.blocks_mut().get_unchecked_mut(data_block_index)
236    }
237    
238    #[inline]
239    pub fn new() -> Self {
240        Default::default()
241    }
242    
243    /// # Panics
244    ///
245    /// Panics, if `index` is out of index range.
246    pub fn insert(&mut self, index: usize){
247        assert!(Self::is_in_range(index), "{index} is out of index range!");
248
249        // That's indices to next level
250        let (level0_index, level1_index, data_index) = Self::level_indices(index);
251
252        unsafe{
253            let data_block = self.get_or_insert_data_block(level0_index, level1_index);
254            data_block.mask_mut().set_bit_unchecked::<true>(data_index);
255        }
256    }
257
258    /// # Panics
259    ///
260    /// Panics, if `block` is out of index range.
261    pub fn insert_block(&mut self, block: DataBlock<Conf::DataBitBlock>) {
262        if block.is_empty() {
263            return;
264        }
265        
266        assert!(
267            Self::is_in_range(block.start_index + Conf::DataBitBlock::size()), 
268            "{:?} is out of index range!", block
269        );
270
271        // That's indices to next level
272        let (level0_index, level1_index, _) = Self::level_indices(block.start_index);
273
274        unsafe{
275            let data_block = self.get_or_insert_data_block(level0_index, level1_index);
276            *data_block.mask_mut() |= block.bit_block; 
277        }
278    }    
279    
280    /// Returns false if `index` is not in bitset.
281    /// 
282    /// # Panics
283    ///
284    /// Panics, if `index` is out of index range.
285    pub fn remove(&mut self, index: usize) -> bool {
286        assert!(Self::is_in_range(index), "{index} is out of index range!");
287
288        // 1. Resolve indices
289        let (level0_index, level1_index, data_index) = Self::level_indices(index);
290        let (level1_block_index, data_block_index) = unsafe {
291            self.get_block_indices(level0_index, level1_index)
292        };
293        if data_block_index == 0 {
294            return false;
295        }
296
297        unsafe {
298            // 2. Get Data block and set bit
299            let data_block = self.data.blocks_mut().get_unchecked_mut(data_block_index);
300            let existed = data_block.mask_mut().set_bit_unchecked::<false>(data_index);
301            
302            // 3. Remove free blocks
303            if data_block.is_empty(){
304                // remove data block
305                self.data.remove_empty_block_unchecked(data_block_index);
306
307                // remove pointer from level1
308                let level1_block = self.level1.blocks_mut().get_unchecked_mut(level1_block_index);
309                level1_block.remove_unchecked(level1_index);
310
311                if level1_block.is_empty(){
312                    // remove level1 block
313                    self.level1.remove_empty_block_unchecked(level1_block_index);
314
315                    // remove pointer from level0
316                    self.level0.remove_unchecked(level0_index);
317                }
318            }
319            existed
320        }
321    }
322}
323
324
325impl<Conf: Config> BitSetBase for BitSet<Conf> {
326    type Conf = Conf;
327    const TRUSTED_HIERARCHY: bool = true;
328}
329
330impl<Conf: Config> LevelMasks for BitSet<Conf> {
331    #[inline]
332    fn level0_mask(&self) -> Conf::Level0BitBlock {
333        *self.level0.mask()
334    }
335
336    #[inline]
337    unsafe fn level1_mask(&self, level0_index: usize) -> Conf::Level1BitBlock {
338        let level1_block_index = self.level0.get_or_zero(level0_index).as_usize();
339        let level1_block = self.level1.blocks().get_unchecked(level1_block_index);
340        *level1_block.mask()
341    }
342
343    #[inline]
344    unsafe fn data_mask(&self, level0_index: usize, level1_index: usize) -> Conf::DataBitBlock {
345        let (_, data_block_index) = self.get_block_indices(level0_index, level1_index);
346        let data_block = self.data.blocks().get_unchecked(data_block_index);
347        *data_block.mask()
348    }    
349}
350
351impl<Conf: Config> LevelMasksIterExt for BitSet<Conf> {
352    /// Points to elements in heap. Guaranteed to be stable.
353    /// This is just plain pointers with null in default:
354    /// `(*const LevelDataBlock<Conf>, *const Level1Block<Conf>)`
355    type Level1BlockData = (
356        Option<NonNull<LevelDataBlock<Conf>>>,  /* data array pointer */
357        Option<NonNull<Level1Block<Conf>>>      /* block pointer */
358    );
359
360    type IterState = ();
361    fn make_iter_state(&self) -> Self::IterState { () }
362    unsafe fn drop_iter_state(&self, _: &mut ManuallyDrop<Self::IterState>) {}
363
364    #[inline]
365    unsafe fn init_level1_block_data(
366        &self,
367        _: &mut Self::IterState,
368        level1_block_data: &mut MaybeUninit<Self::Level1BlockData>,
369        level0_index: usize
370    ) -> (<Self::Conf as Config>::Level1BitBlock, bool){
371        let level1_block_index = self.level0.get_or_zero(level0_index);
372        let level1_block = self.level1.blocks().get_unchecked(level1_block_index.as_usize());
373        level1_block_data.write(
374            (
375                Some(NonNull::new_unchecked(self.data.blocks().as_ptr() as *mut _)),
376                Some(NonNull::from(level1_block))
377            )
378        );
379        (*level1_block.mask(), !level1_block_index.is_zero())
380    }
381
382    #[inline]
383    unsafe fn data_mask_from_block_data(
384        level1_blocks: &Self::Level1BlockData, level1_index: usize
385    ) -> Conf::DataBitBlock {
386        let array_ptr = level1_blocks.0.unwrap_unchecked().as_ptr().cast_const();
387        let level1_block = level1_blocks.1.unwrap_unchecked().as_ref();
388
389        let data_block_index = level1_block.get_or_zero(level1_index);
390        let data_block = &*array_ptr.add(data_block_index.as_usize());
391        *data_block.mask()
392    }    
393}
394
395impl_bitset!(impl<Conf> for ref BitSet<Conf> where Conf: Config);