velesdb_core/column_store/
vacuum.rs1use super::types::{TypedColumn, VacuumConfig, VacuumStats};
7use super::ColumnStore;
8
9use roaring::RoaringBitmap;
10use std::collections::HashMap;
11
12impl ColumnStore {
13 pub fn vacuum(&mut self, _config: VacuumConfig) -> VacuumStats {
27 let start = std::time::Instant::now();
28 let tombstones_found = self.deleted_rows.len();
29
30 if tombstones_found == 0 {
32 return VacuumStats {
33 tombstones_found: 0,
34 completed: true,
35 duration_ms: start.elapsed().as_millis() as u64,
36 ..Default::default()
37 };
38 }
39
40 let mut stats = VacuumStats {
41 tombstones_found,
42 ..Default::default()
43 };
44
45 let mut old_to_new: HashMap<usize, usize> = HashMap::new();
47 let mut new_idx = 0;
48 for old_idx in 0..self.row_count {
49 if !self.deleted_rows.contains(&old_idx) {
50 old_to_new.insert(old_idx, new_idx);
51 new_idx += 1;
52 }
53 }
54 let new_row_count = new_idx;
55
56 for column in self.columns.values_mut() {
58 let (new_col, bytes) = Self::compact_column(column, &self.deleted_rows);
59 stats.bytes_reclaimed += bytes;
60 *column = new_col;
61 }
62
63 if self.primary_key_column.is_some() {
65 let mut new_primary_index: HashMap<i64, usize> = HashMap::new();
66 let mut new_row_idx_to_pk: HashMap<usize, i64> = HashMap::new();
67
68 for (pk, old_idx) in &self.primary_index {
69 if let Some(&new_idx) = old_to_new.get(old_idx) {
70 new_primary_index.insert(*pk, new_idx);
71 new_row_idx_to_pk.insert(new_idx, *pk);
72 }
73 }
74
75 self.primary_index = new_primary_index;
76 self.row_idx_to_pk = new_row_idx_to_pk;
77 }
78
79 let mut new_row_expiry: HashMap<usize, u64> = HashMap::new();
81 for (old_idx, expiry) in &self.row_expiry {
82 if let Some(&new_idx) = old_to_new.get(old_idx) {
83 new_row_expiry.insert(new_idx, *expiry);
84 }
85 }
86 self.row_expiry = new_row_expiry;
87
88 stats.tombstones_removed = self.deleted_rows.len();
90 self.deleted_rows.clear();
91 self.deletion_bitmap.clear(); self.row_count = new_row_count;
93
94 stats.completed = true;
95 stats.duration_ms = start.elapsed().as_millis() as u64;
96 stats
97 }
98
99 fn compact_column(
101 column: &TypedColumn,
102 deleted: &rustc_hash::FxHashSet<usize>,
103 ) -> (TypedColumn, u64) {
104 let mut bytes_reclaimed = 0u64;
105
106 match column {
107 TypedColumn::Int(data) => {
108 let mut new_data = Vec::with_capacity(data.len() - deleted.len());
109 for (idx, value) in data.iter().enumerate() {
110 if deleted.contains(&idx) {
111 bytes_reclaimed += 8; } else {
113 new_data.push(*value);
114 }
115 }
116 (TypedColumn::Int(new_data), bytes_reclaimed)
117 }
118 TypedColumn::Float(data) => {
119 let mut new_data = Vec::with_capacity(data.len() - deleted.len());
120 for (idx, value) in data.iter().enumerate() {
121 if deleted.contains(&idx) {
122 bytes_reclaimed += 8; } else {
124 new_data.push(*value);
125 }
126 }
127 (TypedColumn::Float(new_data), bytes_reclaimed)
128 }
129 TypedColumn::String(data) => {
130 let mut new_data = Vec::with_capacity(data.len() - deleted.len());
131 for (idx, value) in data.iter().enumerate() {
132 if deleted.contains(&idx) {
133 bytes_reclaimed += 4; } else {
135 new_data.push(*value);
136 }
137 }
138 (TypedColumn::String(new_data), bytes_reclaimed)
139 }
140 TypedColumn::Bool(data) => {
141 let mut new_data = Vec::with_capacity(data.len() - deleted.len());
142 for (idx, value) in data.iter().enumerate() {
143 if deleted.contains(&idx) {
144 bytes_reclaimed += 1; } else {
146 new_data.push(*value);
147 }
148 }
149 (TypedColumn::Bool(new_data), bytes_reclaimed)
150 }
151 }
152 }
153
154 #[must_use]
160 pub fn should_vacuum(&self, threshold: f64) -> bool {
161 if self.row_count == 0 {
162 return false;
163 }
164 let ratio = self.deleted_rows.len() as f64 / self.row_count as f64;
165 ratio >= threshold
166 }
167
168 #[must_use]
176 #[inline]
177 pub fn is_row_deleted_bitmap(&self, row_idx: usize) -> bool {
178 if let Ok(idx) = u32::try_from(row_idx) {
179 self.deletion_bitmap.contains(idx)
180 } else {
181 self.deleted_rows.contains(&row_idx)
183 }
184 }
185
186 pub fn live_row_indices(&self) -> impl Iterator<Item = usize> + '_ {
190 (0..self.row_count).filter(|&idx| !self.is_row_deleted_bitmap(idx))
191 }
192
193 #[must_use]
195 pub fn deletion_bitmap(&self) -> &RoaringBitmap {
196 &self.deletion_bitmap
197 }
198
199 #[must_use]
201 pub fn deleted_count_bitmap(&self) -> u64 {
202 self.deletion_bitmap.len()
203 }
204}