fjall/gc/
mod.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::PartitionHandle;
6use lsm_tree::{gc::Report as GcReport, AnyTree};
7
8/// Functions for garbage collection strategies
9///
10/// These functions are to be used with a key-value separated partition.
11pub trait GarbageCollection {
12    /// Collects statistics about blob fragmentation inside the partition.
13    ///
14    /// # Errors
15    ///
16    /// Will return `Err` if an IO error occurs.
17    ///
18    /// # Panics
19    ///
20    /// Panics if the partition is not KV-separated.
21    fn gc_scan(&self) -> crate::Result<GcReport>;
22
23    /// Rewrites blobs in order to achieve the given space amplification factor.
24    ///
25    /// # Examples
26    ///
27    /// ```
28    /// # use fjall::{Config, GarbageCollection, PersistMode, Keyspace, PartitionCreateOptions};
29    /// # let folder = tempfile::tempdir()?;
30    /// # let keyspace = Config::new(folder).open()?;
31    /// let opts = PartitionCreateOptions::default().with_kv_separation(Default::default());
32    /// let blobs = keyspace.open_partition("my_blobs", opts)?;
33    ///
34    /// blobs.insert("a", "hello".repeat(1_000))?;
35    /// blobs.insert("b", "hello".repeat(1_000))?;
36    /// blobs.insert("c", "hello".repeat(1_000))?;
37    /// blobs.insert("d", "hello".repeat(1_000))?;
38    /// blobs.insert("e", "hello".repeat(1_000))?;
39    /// # blobs.rotate_memtable_and_wait()?;
40    /// blobs.remove("a")?;
41    /// blobs.remove("b")?;
42    /// blobs.remove("c")?;
43    /// blobs.remove("d")?;
44    ///
45    /// let report = blobs.gc_scan()?;
46    /// # // assert_eq!(0.8, report.stale_ratio());
47    /// # // assert_eq!(5.0, report.space_amp());
48    /// # // assert_eq!(5, report.total_blobs);
49    /// # // assert_eq!(4, report.stale_blobs);
50    /// # // assert_eq!(0, report.stale_segment_count);
51    /// # // assert_eq!(1, report.segment_count);
52    ///
53    /// let bytes_freed = blobs.gc_with_space_amp_target(1.5)?;
54    /// # // assert!(bytes_freed >= 0);
55    ///
56    /// let report = blobs.gc_scan()?;
57    /// # // assert_eq!(0.0, report.stale_ratio());
58    /// # // assert_eq!(1.0, report.space_amp());
59    /// # // assert_eq!(1, report.total_blobs);
60    /// # // assert_eq!(0, report.stale_blobs);
61    /// # // assert_eq!(0, report.stale_segment_count);
62    /// # // assert_eq!(1, report.segment_count);
63    /// #
64    /// # Ok::<_, fjall::Error>(())
65    /// ```
66    ///
67    /// # Errors
68    ///
69    /// Will return `Err` if an IO error occurs.
70    ///
71    /// # Panics
72    ///
73    /// Panics if the partition is not KV-separated.
74    fn gc_with_space_amp_target(&self, factor: f32) -> crate::Result<u64>;
75
76    /// Rewrites blobs that have reached a given staleness threshold.
77    ///
78    /// # Examples
79    ///
80    /// ```
81    /// # use fjall::{Config, GarbageCollection, PersistMode, Keyspace, PartitionCreateOptions};
82    /// # let folder = tempfile::tempdir()?;
83    /// # let keyspace = Config::new(folder).open()?;
84    /// let opts = PartitionCreateOptions::default().with_kv_separation(Default::default());
85    /// let blobs = keyspace.open_partition("my_blobs", opts)?;
86    ///
87    /// blobs.insert("a", "hello".repeat(1_000))?;
88    /// blobs.insert("b", "hello".repeat(1_000))?;
89    /// blobs.insert("c", "hello".repeat(1_000))?;
90    /// blobs.insert("d", "hello".repeat(1_000))?;
91    /// blobs.insert("e", "hello".repeat(1_000))?;
92    /// # blobs.rotate_memtable_and_wait()?;
93    /// blobs.remove("a")?;
94    /// blobs.remove("b")?;
95    /// blobs.remove("c")?;
96    /// blobs.remove("d")?;
97    ///
98    /// let report = blobs.gc_scan()?;
99    /// # // assert_eq!(0.8, report.stale_ratio());
100    /// # // assert_eq!(5.0, report.space_amp());
101    /// # // assert_eq!(5, report.total_blobs);
102    /// # // assert_eq!(4, report.stale_blobs);
103    /// # // assert_eq!(0, report.stale_segment_count);
104    /// # // assert_eq!(1, report.segment_count);
105    ///
106    /// let bytes_freed = blobs.gc_with_staleness_threshold(0.5)?;
107    /// # // assert!(bytes_freed >= 0);
108    ///
109    /// let report = blobs.gc_scan()?;
110    /// # // assert_eq!(0.0, report.stale_ratio());
111    /// # // assert_eq!(1.0, report.space_amp());
112    /// # // assert_eq!(1, report.total_blobs);
113    /// # // assert_eq!(0, report.stale_blobs);
114    /// # // assert_eq!(0, report.stale_segment_count);
115    /// # // assert_eq!(1, report.segment_count);
116    /// #
117    /// # Ok::<_, fjall::Error>(())
118    /// ```
119    ///
120    /// # Errors
121    ///
122    /// Will return `Err` if an IO error occurs.
123    ///
124    /// # Panics
125    ///
126    /// Panics if the partition is not KV-separated.
127    ///
128    /// Panics if the threshold is negative.
129    ///
130    /// Values above 1.0 will be treated as 1.0.
131    /// If you want to drop only fully stale segments, use [`GarbageCollector::drop_stale_segments`] instead.
132    fn gc_with_staleness_threshold(&self, threshold: f32) -> crate::Result<u64>;
133
134    /// Drops fully stale segments.
135    ///
136    /// This is called implicitly by other garbage collection strategies.
137    ///
138    /// # Examples
139    ///
140    /// ```
141    /// # use fjall::{Config, GarbageCollection, PersistMode, Keyspace, PartitionCreateOptions};
142    /// # let folder = tempfile::tempdir()?;
143    /// # let keyspace = Config::new(folder).open()?;
144    /// let opts = PartitionCreateOptions::default().with_kv_separation(Default::default());
145    /// let blobs = keyspace.open_partition("my_blobs", opts)?;
146    ///
147    /// blobs.insert("a", "hello".repeat(1_000))?;
148    /// assert!(blobs.contains_key("a")?);
149    ///
150    /// # blobs.rotate_memtable_and_wait()?;
151    /// blobs.remove("a")?;
152    /// assert!(!blobs.contains_key("a")?);
153    ///
154    /// let report = blobs.gc_scan()?;
155    /// # // assert_eq!(1.0, report.stale_ratio());
156    /// # // assert_eq!(1, report.stale_blobs);
157    /// # // assert_eq!(1, report.stale_segment_count);
158    /// # // assert_eq!(1, report.segment_count);
159    ///
160    /// let bytes_freed = blobs.gc_drop_stale_segments()?;
161    /// # // assert!(bytes_freed >= 0);
162    ///
163    /// let report = blobs.gc_scan()?;
164    /// # // assert_eq!(0.0, report.stale_ratio());
165    /// # // assert_eq!(0, report.stale_blobs);
166    /// # // assert_eq!(0, report.stale_segment_count);
167    /// # // assert_eq!(0, report.segment_count);
168    /// #
169    /// # Ok::<_, fjall::Error>(())
170    /// ```
171    ///
172    /// # Errors
173    ///
174    /// Will return `Err` if an IO error occurs.
175    ///
176    /// # Panics
177    ///
178    /// Panics if the partition is not KV-separated.
179    fn gc_drop_stale_segments(&self) -> crate::Result<u64>;
180}
181
182pub struct GarbageCollector;
183
184impl GarbageCollector {
185    pub fn scan(partition: &PartitionHandle) -> crate::Result<GcReport> {
186        if let AnyTree::Blob(tree) = &partition.tree {
187            return tree
188                .gc_scan_stats(
189                    partition.seqno.get(),
190                    partition.snapshot_tracker.get_seqno_safe_to_gc(),
191                )
192                .map_err(Into::into);
193        }
194        panic!("Cannot use GC for non-KV-separated tree");
195    }
196
197    pub fn with_space_amp_target(partition: &PartitionHandle, factor: f32) -> crate::Result<u64> {
198        if let AnyTree::Blob(tree) = &partition.tree {
199            let strategy = lsm_tree::gc::SpaceAmpStrategy::new(factor);
200
201            tree.apply_gc_strategy(&strategy, partition.seqno.next())
202                .map_err(Into::into)
203        } else {
204            panic!("Cannot use GC for non-KV-separated tree");
205        }
206    }
207
208    pub fn with_staleness_threshold(
209        partition: &PartitionHandle,
210        threshold: f32,
211    ) -> crate::Result<u64> {
212        if let AnyTree::Blob(tree) = &partition.tree {
213            let strategy = lsm_tree::gc::StaleThresholdStrategy::new(threshold);
214
215            return tree
216                .apply_gc_strategy(&strategy, partition.seqno.next())
217                .map_err(Into::into);
218        }
219        panic!("Cannot use GC for non-KV-separated tree");
220    }
221
222    pub fn drop_stale_segments(partition: &PartitionHandle) -> crate::Result<u64> {
223        if let AnyTree::Blob(tree) = &partition.tree {
224            return tree.gc_drop_stale().map_err(Into::into);
225        }
226        panic!("Cannot use GC for non-KV-separated tree");
227    }
228}