Skip to main content

nodedb_columnar/
delete_bitmap.rs

1//! Per-segment delete bitmap for columnar UPDATE/DELETE.
2//!
3//! Uses roaring bitmaps for space-efficient tracking of deleted row indices.
4//! Each segment has at most one associated DeleteBitmap, stored alongside
5//! the segment as `{segment_id}.del`.
6//!
7//! The bitmap is serialized to bytes for persistence and deserialized on
8//! segment open. Scans consult the bitmap to skip deleted rows.
9
10use roaring::RoaringBitmap;
11
12use crate::error::ColumnarError;
13
14/// Per-segment bitmap of deleted row indices.
15///
16/// Wraps a roaring bitmap for O(1) membership checks and efficient
17/// serialization. Row indices are 0-based within the segment.
18#[derive(Debug, Clone)]
19pub struct DeleteBitmap {
20    inner: RoaringBitmap,
21}
22
23impl DeleteBitmap {
24    /// Create an empty delete bitmap.
25    pub fn new() -> Self {
26        Self {
27            inner: RoaringBitmap::new(),
28        }
29    }
30
31    /// Mark a row as deleted. Returns true if the row was newly deleted.
32    pub fn mark_deleted(&mut self, row_idx: u32) -> bool {
33        self.inner.insert(row_idx)
34    }
35
36    /// Mark multiple rows as deleted.
37    pub fn mark_deleted_batch(&mut self, row_indices: &[u32]) {
38        for &idx in row_indices {
39            self.inner.insert(idx);
40        }
41    }
42
43    /// Check whether a row is deleted.
44    pub fn is_deleted(&self, row_idx: u32) -> bool {
45        self.inner.contains(row_idx)
46    }
47
48    /// Number of deleted rows.
49    pub fn deleted_count(&self) -> u64 {
50        self.inner.len()
51    }
52
53    /// Whether the bitmap is empty (no deletions).
54    pub fn is_empty(&self) -> bool {
55        self.inner.is_empty()
56    }
57
58    /// Delete ratio: deleted_count / total_rows. Used to trigger compaction.
59    pub fn delete_ratio(&self, total_rows: u64) -> f64 {
60        if total_rows == 0 {
61            return 0.0;
62        }
63        self.inner.len() as f64 / total_rows as f64
64    }
65
66    /// Whether this segment should be compacted (delete ratio > threshold).
67    pub fn should_compact(&self, total_rows: u64, threshold: f64) -> bool {
68        self.delete_ratio(total_rows) > threshold
69    }
70
71    /// Check whether an entire block is fully deleted.
72    ///
73    /// `block_start` is the first row index of the block, `block_len` is
74    /// the number of rows. Returns true if every row in the range is deleted.
75    pub fn is_block_fully_deleted(&self, block_start: u32, block_len: u32) -> bool {
76        if block_len == 0 {
77            return true;
78        }
79        // Count deleted rows in the range [block_start, block_start + block_len).
80        let count = self
81            .inner
82            .range(block_start..block_start + block_len)
83            .count() as u32;
84        count == block_len
85    }
86
87    /// Apply the delete bitmap to a validity vector: set deleted rows to false.
88    ///
89    /// `global_offset` is the row index of the first element in `valid`
90    /// (used when processing a block that starts at a non-zero row).
91    pub fn apply_to_validity(&self, valid: &mut [bool], global_offset: u32) {
92        for (i, v) in valid.iter_mut().enumerate() {
93            if self.inner.contains(global_offset + i as u32) {
94                *v = false;
95            }
96        }
97    }
98
99    /// Serialize the bitmap to bytes for persistence.
100    pub fn to_bytes(&self) -> Result<Vec<u8>, ColumnarError> {
101        let mut buf = Vec::with_capacity(self.inner.serialized_size());
102        self.inner
103            .serialize_into(&mut buf)
104            .map_err(|e| ColumnarError::Serialization(format!("delete bitmap: {e}")))?;
105        Ok(buf)
106    }
107
108    /// Deserialize a bitmap from bytes.
109    pub fn from_bytes(data: &[u8]) -> Result<Self, ColumnarError> {
110        let inner = RoaringBitmap::deserialize_from(data)
111            .map_err(|e| ColumnarError::Serialization(format!("delete bitmap: {e}")))?;
112        Ok(Self { inner })
113    }
114
115    /// Iterate over all deleted row indices.
116    pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
117        self.inner.iter()
118    }
119
120    /// Merge another bitmap into this one (union). Used during compaction
121    /// to combine delete bitmaps from multiple source segments.
122    pub fn merge(&mut self, other: &DeleteBitmap) {
123        self.inner |= &other.inner;
124    }
125}
126
127impl Default for DeleteBitmap {
128    fn default() -> Self {
129        Self::new()
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    #[test]
138    fn mark_and_check() {
139        let mut bm = DeleteBitmap::new();
140        assert!(!bm.is_deleted(5));
141        assert!(bm.mark_deleted(5));
142        assert!(bm.is_deleted(5));
143        assert!(!bm.mark_deleted(5)); // Already deleted.
144        assert_eq!(bm.deleted_count(), 1);
145    }
146
147    #[test]
148    fn batch_delete() {
149        let mut bm = DeleteBitmap::new();
150        bm.mark_deleted_batch(&[1, 3, 5, 7, 9]);
151        assert_eq!(bm.deleted_count(), 5);
152        assert!(bm.is_deleted(3));
153        assert!(!bm.is_deleted(4));
154    }
155
156    #[test]
157    fn serialization_roundtrip() {
158        let mut bm = DeleteBitmap::new();
159        bm.mark_deleted_batch(&[0, 10, 100, 1000, 10000]);
160
161        let bytes = bm.to_bytes().unwrap();
162        let restored = DeleteBitmap::from_bytes(&bytes).expect("deserialize");
163
164        assert_eq!(restored.deleted_count(), 5);
165        assert!(restored.is_deleted(0));
166        assert!(restored.is_deleted(10));
167        assert!(restored.is_deleted(10000));
168        assert!(!restored.is_deleted(50));
169    }
170
171    #[test]
172    fn delete_ratio_and_compaction() {
173        let mut bm = DeleteBitmap::new();
174        for i in 0..30 {
175            bm.mark_deleted(i);
176        }
177        // 30 deleted out of 100 total = 0.3
178        assert!((bm.delete_ratio(100) - 0.3).abs() < 0.001);
179        // Default threshold 0.2 → should compact.
180        assert!(bm.should_compact(100, 0.2));
181        // Threshold 0.5 → should not compact.
182        assert!(!bm.should_compact(100, 0.5));
183    }
184
185    #[test]
186    fn block_fully_deleted() {
187        let mut bm = DeleteBitmap::new();
188        // Delete rows 10..20.
189        for i in 10..20 {
190            bm.mark_deleted(i);
191        }
192        assert!(bm.is_block_fully_deleted(10, 10));
193        assert!(!bm.is_block_fully_deleted(10, 11)); // Row 20 is not deleted.
194        assert!(!bm.is_block_fully_deleted(5, 10)); // Rows 5-9 are not deleted.
195    }
196
197    #[test]
198    fn apply_to_validity() {
199        let mut bm = DeleteBitmap::new();
200        bm.mark_deleted_batch(&[2, 4]);
201
202        let mut valid = vec![true, true, true, true, true];
203        bm.apply_to_validity(&mut valid, 0);
204
205        assert!(valid[0]);
206        assert!(valid[1]);
207        assert!(!valid[2]); // Deleted.
208        assert!(valid[3]);
209        assert!(!valid[4]); // Deleted.
210    }
211
212    #[test]
213    fn apply_to_validity_with_offset() {
214        let mut bm = DeleteBitmap::new();
215        bm.mark_deleted(1025); // Row in second block.
216
217        let mut valid = vec![true; 1024];
218        bm.apply_to_validity(&mut valid, 1024); // Block starts at row 1024.
219
220        assert!(valid[0]); // Global row 1024: not deleted.
221        assert!(!valid[1]); // Global row 1025: deleted.
222        assert!(valid[2]); // Global row 1026: not deleted.
223    }
224
225    #[test]
226    fn merge_bitmaps() {
227        let mut bm1 = DeleteBitmap::new();
228        bm1.mark_deleted_batch(&[1, 2, 3]);
229
230        let mut bm2 = DeleteBitmap::new();
231        bm2.mark_deleted_batch(&[3, 4, 5]);
232
233        bm1.merge(&bm2);
234        assert_eq!(bm1.deleted_count(), 5); // Union: {1,2,3,4,5}.
235    }
236
237    #[test]
238    fn empty_bitmap() {
239        let bm = DeleteBitmap::new();
240        assert!(bm.is_empty());
241        assert_eq!(bm.deleted_count(), 0);
242        assert!(!bm.is_deleted(0));
243
244        // Serialization roundtrip of empty bitmap.
245        let bytes = bm.to_bytes().unwrap();
246        let restored = DeleteBitmap::from_bytes(&bytes).expect("deserialize");
247        assert!(restored.is_empty());
248    }
249}