1use crate::document::Document;
2use crate::rga::ItemState;
3use crate::stroke::StrokePoint;
4use crate::types::OpId;
5use std::collections::{HashMap, HashSet};
6
7pub struct GcConfig {
9 pub tombstone_ratio_threshold: f64,
11 pub tombstone_count_threshold: usize,
13 pub max_gc_per_cycle: usize,
15}
16
17impl Default for GcConfig {
18 fn default() -> Self {
19 Self {
20 tombstone_ratio_threshold: 0.30,
21 tombstone_count_threshold: 10_000,
22 max_gc_per_cycle: 5_000,
23 }
24 }
25}
26
27#[derive(Debug, Clone)]
29pub struct GcResult {
30 pub tombstones_removed: usize,
31 pub bytes_freed_estimate: usize,
32 pub generation: u64,
33 pub partial: bool,
35}
36
37fn find_kept_ancestor(
46 mut origin: OpId,
47 remove_set: &HashSet<OpId>,
48 items: &[crate::rga::RgaItem],
49 index: &HashMap<OpId, usize>,
50) -> OpId {
51 const MAX_DEPTH: usize = 10_000; for _ in 0..MAX_DEPTH {
53 if !remove_set.contains(&origin) {
54 return origin; }
56 match index.get(&origin) {
57 Some(&pos) => origin = items[pos].origin_left,
58 None => return OpId::ZERO, }
60 }
61 OpId::ZERO }
63
64impl Document {
65 pub fn should_gc(&self, config: &GcConfig) -> bool {
67 self.stroke_order.tombstone_ratio() > config.tombstone_ratio_threshold
68 || self.stroke_order.tombstone_count >= config.tombstone_count_threshold
69 }
70
71 pub fn run_gc(&mut self, config: &GcConfig) -> GcResult {
94 let mut removed = 0;
95 let mut bytes_freed: usize = 0;
96 let mut ids_to_remove: Vec<OpId> = Vec::new();
97 let mut partial = false;
98
99 for item in self.stroke_order.items.iter() {
101 if removed >= config.max_gc_per_cycle {
102 partial = true;
103 break;
104 }
105
106 if let ItemState::Tombstone { deleted_at } = &item.state {
107 let is_stable = self.min_version.get(deleted_at.actor)
108 >= deleted_at.lamport.0;
109
110 if is_stable {
111 ids_to_remove.push(item.id);
112 removed += 1;
113
114 bytes_freed += std::mem::size_of::<crate::rga::RgaItem>();
116 bytes_freed += 80; if let Some((data, _)) = self.stroke_store.strokes.get(&item.content) {
118 bytes_freed += data.points.len() * std::mem::size_of::<StrokePoint>();
119 bytes_freed += 128; }
121 }
122 }
123 }
124
125 if ids_to_remove.is_empty() {
126 self.gc_generation += 1;
127 return GcResult {
128 tombstones_removed: 0,
129 bytes_freed_estimate: 0,
130 generation: self.gc_generation,
131 partial,
132 };
133 }
134
135 let remove_set: HashSet<OpId> = ids_to_remove.iter().copied().collect();
136
137 let reparents: Vec<(OpId, OpId, OpId)> = self
146 .stroke_order
147 .items
148 .iter()
149 .filter(|item| !remove_set.contains(&item.id)) .filter(|item| {
151 remove_set.contains(&item.origin_left)
152 || (!item.origin_right.is_zero() && remove_set.contains(&item.origin_right))
153 })
154 .map(|item| {
155 let new_ol = if remove_set.contains(&item.origin_left) {
156 find_kept_ancestor(
157 item.origin_left,
158 &remove_set,
159 &self.stroke_order.items,
160 &self.stroke_order.index,
161 )
162 } else {
163 item.origin_left
164 };
165 let new_or = if !item.origin_right.is_zero()
166 && remove_set.contains(&item.origin_right)
167 {
168 find_kept_ancestor(
169 item.origin_right,
170 &remove_set,
171 &self.stroke_order.items,
172 &self.stroke_order.index,
173 )
174 } else {
175 item.origin_right
176 };
177 (item.id, new_ol, new_or)
178 })
179 .collect();
180
181 for (id, new_ol, new_or) in reparents {
183 if let Some(&pos) = self.stroke_order.index.get(&id) {
184 self.stroke_order.items[pos].origin_left = new_ol;
185 self.stroke_order.items[pos].origin_right = new_or;
186 }
187 }
188
189 for id in &ids_to_remove {
191 self.stroke_store.remove(id);
192 }
193
194 self.stroke_order.items.retain(|item| !remove_set.contains(&item.id));
195
196 self.stroke_order.rebuild_index();
198
199 self.stroke_order.tombstone_count =
200 self.stroke_order.tombstone_count.saturating_sub(removed);
201 self.stroke_order.total_count =
202 self.stroke_order.total_count.saturating_sub(removed);
203
204 self.gc_generation += 1;
205
206 GcResult {
207 tombstones_removed: removed,
208 bytes_freed_estimate: bytes_freed,
209 generation: self.gc_generation,
210 partial,
211 }
212 }
213
214 pub fn update_min_version(
216 &mut self,
217 mvv: crate::types::VectorClock,
218 config: &GcConfig,
219 ) -> Option<GcResult> {
220 self.min_version = mvv;
221 if self.should_gc(config) {
222 Some(self.run_gc(config))
223 } else {
224 None
225 }
226 }
227}
228
229#[cfg(test)]
232mod tests {
233 use super::*;
234 use crate::document::Document;
235 use crate::stroke::{StrokeData, StrokePoint, StrokeProperties, ToolKind};
236 use crate::types::{ActorId, OpId, VectorClock};
237
238 fn simple_doc() -> Document {
239 let mut doc = Document::new(ActorId(1));
240 doc.simplify_epsilon = 0.0; doc
242 }
243
244 fn simple_stroke() -> (StrokeData, StrokeProperties) {
245 let pts: Box<[StrokePoint]> = vec![StrokePoint::basic(0.0, 0.0)].into();
246 let data = StrokeData::new(pts, ToolKind::Pen);
247 let props = StrokeProperties::new(0xFF0000FF, 2.0, 1.0, OpId::ZERO);
248 (data, props)
249 }
250
251 fn all_stable_mvv() -> VectorClock {
252 let mut mvv = VectorClock::new();
253 mvv.advance(ActorId(1), 1_000_000); mvv
255 }
256
257 fn aggressive_gc() -> GcConfig {
258 GcConfig {
259 tombstone_ratio_threshold: 0.0,
260 tombstone_count_threshold: 0,
261 max_gc_per_cycle: 100_000,
262 }
263 }
264
265 #[test]
266 fn gc_removes_stable_tombstones() {
267 let mut doc = simple_doc();
268 let (data, props) = simple_stroke();
269 let id = doc.insert_stroke(data, props);
270
271 doc.delete_stroke(id);
272 assert_eq!(doc.stroke_order.tombstone_count, 1);
273
274 doc.min_version = all_stable_mvv();
275 let result = doc.run_gc(&aggressive_gc());
276
277 assert_eq!(result.tombstones_removed, 1);
278 assert_eq!(doc.stroke_order.tombstone_count, 0);
279 assert_eq!(doc.stroke_order.total_count, 0);
280 }
281
282 #[test]
283 fn gc_does_not_remove_unstable_tombstones() {
284 let mut doc = simple_doc();
285 let (data, props) = simple_stroke();
286 let id = doc.insert_stroke(data, props);
287 doc.delete_stroke(id);
288
289 doc.min_version = VectorClock::new(); let result = doc.run_gc(&aggressive_gc());
292
293 assert_eq!(result.tombstones_removed, 0);
294 assert_eq!(doc.stroke_order.tombstone_count, 1);
295 }
296
297 #[test]
304 fn gc_reparents_surviving_origin_references() {
305 use crate::encoding::{decode_snapshot, encode_snapshot};
306
307 let mut doc = simple_doc();
308 doc.simplify_epsilon = 0.0;
309
310 let (da, pa) = simple_stroke();
312 let id_a = doc.insert_stroke(da, pa);
313
314 let (db, pb) = simple_stroke();
315 let id_b = doc.insert_stroke(db, pb);
316
317 let (dc, pc) = simple_stroke();
318 let id_c = doc.insert_stroke(dc, pc);
319
320 doc.delete_stroke(id_b);
322
323 assert_eq!(doc.visible_stroke_ids(), vec![id_a, id_c]);
326
327 doc.min_version = all_stable_mvv();
330 let result = doc.run_gc(&aggressive_gc());
331 assert_eq!(result.tombstones_removed, 1);
332
333 assert_eq!(doc.visible_stroke_ids(), vec![id_a, id_c]);
335
336 let snapshot = encode_snapshot(&doc);
338 let doc2 = decode_snapshot(&snapshot, ActorId(99)).expect("snapshot decode failed");
339
340 let visible2 = doc2.visible_stroke_ids();
342 assert_eq!(visible2.len(), 2);
343 assert_eq!(visible2[0], id_a, "A must remain before C");
344 assert_eq!(visible2[1], id_c, "C must follow A after reconstruction");
345 }
346
347 #[test]
348 fn gc_reparent_chain_multiple_hops() {
349 use crate::encoding::{decode_snapshot, encode_snapshot};
352
353 let mut doc = simple_doc();
354 doc.simplify_epsilon = 0.0;
355
356 let (da, pa) = simple_stroke();
357 let id_a = doc.insert_stroke(da, pa);
358 let (db, pb) = simple_stroke();
359 let id_b = doc.insert_stroke(db, pb);
360 let (dc, pc) = simple_stroke();
361 let id_c = doc.insert_stroke(dc, pc);
362 let (dd, pd) = simple_stroke();
363 let id_d = doc.insert_stroke(dd, pd);
364
365 doc.delete_stroke(id_b);
366 doc.delete_stroke(id_c);
367
368 assert_eq!(doc.visible_stroke_ids(), vec![id_a, id_d]);
369
370 doc.min_version = all_stable_mvv();
371 let result = doc.run_gc(&aggressive_gc());
372 assert_eq!(result.tombstones_removed, 2);
373 assert_eq!(doc.visible_stroke_ids(), vec![id_a, id_d]);
374
375 let snapshot = encode_snapshot(&doc);
376 let doc2 = decode_snapshot(&snapshot, ActorId(99)).unwrap();
377 let visible2 = doc2.visible_stroke_ids();
378 assert_eq!(visible2, vec![id_a, id_d]);
379 }
380}