Skip to main content

nodedb_columnar/
delete_bitmap.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Per-segment delete bitmap for columnar UPDATE/DELETE.
4//!
5//! Uses roaring bitmaps for space-efficient tracking of deleted row indices.
6//! Each segment has at most one associated DeleteBitmap, stored alongside
7//! the segment as `{segment_id}.del`.
8//!
9//! The bitmap is serialized to bytes for persistence and deserialized on
10//! segment open. Scans consult the bitmap to skip deleted rows.
11
12use roaring::RoaringBitmap;
13
14use crate::error::ColumnarError;
15
16/// Per-segment bitmap of deleted row indices.
17///
18/// Wraps a roaring bitmap for O(1) membership checks and efficient
19/// serialization. Row indices are 0-based within the segment.
20#[derive(Debug, Clone)]
21pub struct DeleteBitmap {
22    inner: RoaringBitmap,
23}
24
25impl DeleteBitmap {
26    /// Create an empty delete bitmap.
27    pub fn new() -> Self {
28        Self {
29            inner: RoaringBitmap::new(),
30        }
31    }
32
33    /// Mark a row as deleted. Returns true if the row was newly deleted.
34    pub fn mark_deleted(&mut self, row_idx: u32) -> bool {
35        self.inner.insert(row_idx)
36    }
37
38    /// Mark multiple rows as deleted.
39    pub fn mark_deleted_batch(&mut self, row_indices: &[u32]) {
40        for &idx in row_indices {
41            self.inner.insert(idx);
42        }
43    }
44
45    /// Check whether a row is deleted.
46    pub fn is_deleted(&self, row_idx: u32) -> bool {
47        self.inner.contains(row_idx)
48    }
49
50    /// Number of deleted rows.
51    pub fn deleted_count(&self) -> u64 {
52        self.inner.len()
53    }
54
55    /// Whether the bitmap is empty (no deletions).
56    pub fn is_empty(&self) -> bool {
57        self.inner.is_empty()
58    }
59
60    /// Delete ratio: deleted_count / total_rows. Used to trigger compaction.
61    pub fn delete_ratio(&self, total_rows: u64) -> f64 {
62        if total_rows == 0 {
63            return 0.0;
64        }
65        self.inner.len() as f64 / total_rows as f64
66    }
67
68    /// Whether this segment should be compacted (delete ratio > threshold).
69    pub fn should_compact(&self, total_rows: u64, threshold: f64) -> bool {
70        self.delete_ratio(total_rows) > threshold
71    }
72
73    /// Check whether an entire block is fully deleted.
74    ///
75    /// `block_start` is the first row index of the block, `block_len` is
76    /// the number of rows. Returns true if every row in the range is deleted.
77    pub fn is_block_fully_deleted(&self, block_start: u32, block_len: u32) -> bool {
78        if block_len == 0 {
79            return true;
80        }
81        // Count deleted rows in the range [block_start, block_start + block_len).
82        let count = self
83            .inner
84            .range(block_start..block_start + block_len)
85            .count() as u32;
86        count == block_len
87    }
88
89    /// Apply the delete bitmap to a validity vector: set deleted rows to false.
90    ///
91    /// `global_offset` is the row index of the first element in `valid`
92    /// (used when processing a block that starts at a non-zero row).
93    pub fn apply_to_validity(&self, valid: &mut [bool], global_offset: u32) {
94        for (i, v) in valid.iter_mut().enumerate() {
95            if self.inner.contains(global_offset + i as u32) {
96                *v = false;
97            }
98        }
99    }
100
101    /// Serialize the bitmap to bytes for persistence.
102    pub fn to_bytes(&self) -> Result<Vec<u8>, ColumnarError> {
103        // no-governor: cold bitmap serialization; size = roaring bitmap serialized_size, governed at checkpoint
104        let mut buf = Vec::with_capacity(self.inner.serialized_size());
105        self.inner
106            .serialize_into(&mut buf)
107            .map_err(|e| ColumnarError::Serialization(format!("delete bitmap: {e}")))?;
108        Ok(buf)
109    }
110
111    /// Deserialize a bitmap from bytes.
112    pub fn from_bytes(data: &[u8]) -> Result<Self, ColumnarError> {
113        let inner = RoaringBitmap::deserialize_from(data)
114            .map_err(|e| ColumnarError::Serialization(format!("delete bitmap: {e}")))?;
115        Ok(Self { inner })
116    }
117
118    /// Iterate over all deleted row indices.
119    pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
120        self.inner.iter()
121    }
122
123    /// Remove a row from the deleted set (undo a tombstone).
124    ///
125    /// Used exclusively by transaction rollback to clear tombstones that
126    /// were set by a preceding insert's upsert path. Returns true if the
127    /// row was previously deleted, false if it was already live.
128    pub fn unmark_deleted(&mut self, row_idx: u32) -> bool {
129        self.inner.remove(row_idx)
130    }
131
132    /// Merge another bitmap into this one (union). Used during compaction
133    /// to combine delete bitmaps from multiple source segments.
134    pub fn merge(&mut self, other: &DeleteBitmap) {
135        self.inner |= &other.inner;
136    }
137}
138
139impl Default for DeleteBitmap {
140    fn default() -> Self {
141        Self::new()
142    }
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148
149    #[test]
150    fn mark_and_check() {
151        let mut bm = DeleteBitmap::new();
152        assert!(!bm.is_deleted(5));
153        assert!(bm.mark_deleted(5));
154        assert!(bm.is_deleted(5));
155        assert!(!bm.mark_deleted(5)); // Already deleted.
156        assert_eq!(bm.deleted_count(), 1);
157    }
158
159    #[test]
160    fn batch_delete() {
161        let mut bm = DeleteBitmap::new();
162        bm.mark_deleted_batch(&[1, 3, 5, 7, 9]);
163        assert_eq!(bm.deleted_count(), 5);
164        assert!(bm.is_deleted(3));
165        assert!(!bm.is_deleted(4));
166    }
167
168    #[test]
169    fn serialization_roundtrip() {
170        let mut bm = DeleteBitmap::new();
171        bm.mark_deleted_batch(&[0, 10, 100, 1000, 10000]);
172
173        let bytes = bm.to_bytes().unwrap();
174        let restored = DeleteBitmap::from_bytes(&bytes).expect("deserialize");
175
176        assert_eq!(restored.deleted_count(), 5);
177        assert!(restored.is_deleted(0));
178        assert!(restored.is_deleted(10));
179        assert!(restored.is_deleted(10000));
180        assert!(!restored.is_deleted(50));
181    }
182
183    #[test]
184    fn delete_ratio_and_compaction() {
185        let mut bm = DeleteBitmap::new();
186        for i in 0..30 {
187            bm.mark_deleted(i);
188        }
189        // 30 deleted out of 100 total = 0.3
190        assert!((bm.delete_ratio(100) - 0.3).abs() < 0.001);
191        // Default threshold 0.2 → should compact.
192        assert!(bm.should_compact(100, 0.2));
193        // Threshold 0.5 → should not compact.
194        assert!(!bm.should_compact(100, 0.5));
195    }
196
197    #[test]
198    fn block_fully_deleted() {
199        let mut bm = DeleteBitmap::new();
200        // Delete rows 10..20.
201        for i in 10..20 {
202            bm.mark_deleted(i);
203        }
204        assert!(bm.is_block_fully_deleted(10, 10));
205        assert!(!bm.is_block_fully_deleted(10, 11)); // Row 20 is not deleted.
206        assert!(!bm.is_block_fully_deleted(5, 10)); // Rows 5-9 are not deleted.
207    }
208
209    #[test]
210    fn apply_to_validity() {
211        let mut bm = DeleteBitmap::new();
212        bm.mark_deleted_batch(&[2, 4]);
213
214        let mut valid = vec![true, true, true, true, true];
215        bm.apply_to_validity(&mut valid, 0);
216
217        assert!(valid[0]);
218        assert!(valid[1]);
219        assert!(!valid[2]); // Deleted.
220        assert!(valid[3]);
221        assert!(!valid[4]); // Deleted.
222    }
223
224    #[test]
225    fn apply_to_validity_with_offset() {
226        let mut bm = DeleteBitmap::new();
227        bm.mark_deleted(1025); // Row in second block.
228
229        let mut valid = vec![true; 1024];
230        bm.apply_to_validity(&mut valid, 1024); // Block starts at row 1024.
231
232        assert!(valid[0]); // Global row 1024: not deleted.
233        assert!(!valid[1]); // Global row 1025: deleted.
234        assert!(valid[2]); // Global row 1026: not deleted.
235    }
236
237    #[test]
238    fn merge_bitmaps() {
239        let mut bm1 = DeleteBitmap::new();
240        bm1.mark_deleted_batch(&[1, 2, 3]);
241
242        let mut bm2 = DeleteBitmap::new();
243        bm2.mark_deleted_batch(&[3, 4, 5]);
244
245        bm1.merge(&bm2);
246        assert_eq!(bm1.deleted_count(), 5); // Union: {1,2,3,4,5}.
247    }
248
249    #[test]
250    fn empty_bitmap() {
251        let bm = DeleteBitmap::new();
252        assert!(bm.is_empty());
253        assert_eq!(bm.deleted_count(), 0);
254        assert!(!bm.is_deleted(0));
255
256        // Serialization roundtrip of empty bitmap.
257        let bytes = bm.to_bytes().unwrap();
258        let restored = DeleteBitmap::from_bytes(&bytes).expect("deserialize");
259        assert!(restored.is_empty());
260    }
261}