fjall/gc/mod.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::PartitionHandle;
6use lsm_tree::{gc::Report as GcReport, AnyTree};
7
8/// Functions for garbage collection strategies
9///
10/// These functions are to be used with a key-value separated partition.
11pub trait GarbageCollection {
12 /// Collects statistics about blob fragmentation inside the partition.
13 ///
14 /// # Errors
15 ///
16 /// Will return `Err` if an IO error occurs.
17 ///
18 /// # Panics
19 ///
20 /// Panics if the partition is not KV-separated.
21 fn gc_scan(&self) -> crate::Result<GcReport>;
22
23 /// Rewrites blobs in order to achieve the given space amplification factor.
24 ///
25 /// # Examples
26 ///
27 /// ```
28 /// # use fjall::{Config, GarbageCollection, PersistMode, Keyspace, PartitionCreateOptions};
29 /// # let folder = tempfile::tempdir()?;
30 /// # let keyspace = Config::new(folder).open()?;
31 /// let opts = PartitionCreateOptions::default().with_kv_separation(Default::default());
32 /// let blobs = keyspace.open_partition("my_blobs", opts)?;
33 ///
34 /// blobs.insert("a", "hello".repeat(1_000))?;
35 /// blobs.insert("b", "hello".repeat(1_000))?;
36 /// blobs.insert("c", "hello".repeat(1_000))?;
37 /// blobs.insert("d", "hello".repeat(1_000))?;
38 /// blobs.insert("e", "hello".repeat(1_000))?;
39 /// # blobs.rotate_memtable_and_wait()?;
40 /// blobs.remove("a")?;
41 /// blobs.remove("b")?;
42 /// blobs.remove("c")?;
43 /// blobs.remove("d")?;
44 ///
45 /// let report = blobs.gc_scan()?;
46 /// # // assert_eq!(0.8, report.stale_ratio());
47 /// # // assert_eq!(5.0, report.space_amp());
48 /// # // assert_eq!(5, report.total_blobs);
49 /// # // assert_eq!(4, report.stale_blobs);
50 /// # // assert_eq!(0, report.stale_segment_count);
51 /// # // assert_eq!(1, report.segment_count);
52 ///
53 /// let bytes_freed = blobs.gc_with_space_amp_target(1.5)?;
54 /// # // assert!(bytes_freed >= 0);
55 ///
56 /// let report = blobs.gc_scan()?;
57 /// # // assert_eq!(0.0, report.stale_ratio());
58 /// # // assert_eq!(1.0, report.space_amp());
59 /// # // assert_eq!(1, report.total_blobs);
60 /// # // assert_eq!(0, report.stale_blobs);
61 /// # // assert_eq!(0, report.stale_segment_count);
62 /// # // assert_eq!(1, report.segment_count);
63 /// #
64 /// # Ok::<_, fjall::Error>(())
65 /// ```
66 ///
67 /// # Errors
68 ///
69 /// Will return `Err` if an IO error occurs.
70 ///
71 /// # Panics
72 ///
73 /// Panics if the partition is not KV-separated.
74 fn gc_with_space_amp_target(&self, factor: f32) -> crate::Result<u64>;
75
76 /// Rewrites blobs that have reached a given staleness threshold.
77 ///
78 /// # Examples
79 ///
80 /// ```
81 /// # use fjall::{Config, GarbageCollection, PersistMode, Keyspace, PartitionCreateOptions};
82 /// # let folder = tempfile::tempdir()?;
83 /// # let keyspace = Config::new(folder).open()?;
84 /// let opts = PartitionCreateOptions::default().with_kv_separation(Default::default());
85 /// let blobs = keyspace.open_partition("my_blobs", opts)?;
86 ///
87 /// blobs.insert("a", "hello".repeat(1_000))?;
88 /// blobs.insert("b", "hello".repeat(1_000))?;
89 /// blobs.insert("c", "hello".repeat(1_000))?;
90 /// blobs.insert("d", "hello".repeat(1_000))?;
91 /// blobs.insert("e", "hello".repeat(1_000))?;
92 /// # blobs.rotate_memtable_and_wait()?;
93 /// blobs.remove("a")?;
94 /// blobs.remove("b")?;
95 /// blobs.remove("c")?;
96 /// blobs.remove("d")?;
97 ///
98 /// let report = blobs.gc_scan()?;
99 /// # // assert_eq!(0.8, report.stale_ratio());
100 /// # // assert_eq!(5.0, report.space_amp());
101 /// # // assert_eq!(5, report.total_blobs);
102 /// # // assert_eq!(4, report.stale_blobs);
103 /// # // assert_eq!(0, report.stale_segment_count);
104 /// # // assert_eq!(1, report.segment_count);
105 ///
106 /// let bytes_freed = blobs.gc_with_staleness_threshold(0.5)?;
107 /// # // assert!(bytes_freed >= 0);
108 ///
109 /// let report = blobs.gc_scan()?;
110 /// # // assert_eq!(0.0, report.stale_ratio());
111 /// # // assert_eq!(1.0, report.space_amp());
112 /// # // assert_eq!(1, report.total_blobs);
113 /// # // assert_eq!(0, report.stale_blobs);
114 /// # // assert_eq!(0, report.stale_segment_count);
115 /// # // assert_eq!(1, report.segment_count);
116 /// #
117 /// # Ok::<_, fjall::Error>(())
118 /// ```
119 ///
120 /// # Errors
121 ///
122 /// Will return `Err` if an IO error occurs.
123 ///
124 /// # Panics
125 ///
126 /// Panics if the partition is not KV-separated.
127 ///
128 /// Panics if the threshold is negative.
129 ///
130 /// Values above 1.0 will be treated as 1.0.
131 /// If you want to drop only fully stale segments, use [`GarbageCollector::drop_stale_segments`] instead.
132 fn gc_with_staleness_threshold(&self, threshold: f32) -> crate::Result<u64>;
133
134 /// Drops fully stale segments.
135 ///
136 /// This is called implicitly by other garbage collection strategies.
137 ///
138 /// # Examples
139 ///
140 /// ```
141 /// # use fjall::{Config, GarbageCollection, PersistMode, Keyspace, PartitionCreateOptions};
142 /// # let folder = tempfile::tempdir()?;
143 /// # let keyspace = Config::new(folder).open()?;
144 /// let opts = PartitionCreateOptions::default().with_kv_separation(Default::default());
145 /// let blobs = keyspace.open_partition("my_blobs", opts)?;
146 ///
147 /// blobs.insert("a", "hello".repeat(1_000))?;
148 /// assert!(blobs.contains_key("a")?);
149 ///
150 /// # blobs.rotate_memtable_and_wait()?;
151 /// blobs.remove("a")?;
152 /// assert!(!blobs.contains_key("a")?);
153 ///
154 /// let report = blobs.gc_scan()?;
155 /// # // assert_eq!(1.0, report.stale_ratio());
156 /// # // assert_eq!(1, report.stale_blobs);
157 /// # // assert_eq!(1, report.stale_segment_count);
158 /// # // assert_eq!(1, report.segment_count);
159 ///
160 /// let bytes_freed = blobs.gc_drop_stale_segments()?;
161 /// # // assert!(bytes_freed >= 0);
162 ///
163 /// let report = blobs.gc_scan()?;
164 /// # // assert_eq!(0.0, report.stale_ratio());
165 /// # // assert_eq!(0, report.stale_blobs);
166 /// # // assert_eq!(0, report.stale_segment_count);
167 /// # // assert_eq!(0, report.segment_count);
168 /// #
169 /// # Ok::<_, fjall::Error>(())
170 /// ```
171 ///
172 /// # Errors
173 ///
174 /// Will return `Err` if an IO error occurs.
175 ///
176 /// # Panics
177 ///
178 /// Panics if the partition is not KV-separated.
179 fn gc_drop_stale_segments(&self) -> crate::Result<u64>;
180}
181
182pub struct GarbageCollector;
183
184impl GarbageCollector {
185 pub fn scan(partition: &PartitionHandle) -> crate::Result<GcReport> {
186 if let AnyTree::Blob(tree) = &partition.tree {
187 return tree
188 .gc_scan_stats(
189 partition.seqno.get(),
190 partition.snapshot_tracker.get_seqno_safe_to_gc(),
191 )
192 .map_err(Into::into);
193 }
194 panic!("Cannot use GC for non-KV-separated tree");
195 }
196
197 pub fn with_space_amp_target(partition: &PartitionHandle, factor: f32) -> crate::Result<u64> {
198 if let AnyTree::Blob(tree) = &partition.tree {
199 let strategy = lsm_tree::gc::SpaceAmpStrategy::new(factor);
200
201 tree.apply_gc_strategy(&strategy, partition.seqno.next())
202 .map_err(Into::into)
203 } else {
204 panic!("Cannot use GC for non-KV-separated tree");
205 }
206 }
207
208 pub fn with_staleness_threshold(
209 partition: &PartitionHandle,
210 threshold: f32,
211 ) -> crate::Result<u64> {
212 if let AnyTree::Blob(tree) = &partition.tree {
213 let strategy = lsm_tree::gc::StaleThresholdStrategy::new(threshold);
214
215 return tree
216 .apply_gc_strategy(&strategy, partition.seqno.next())
217 .map_err(Into::into);
218 }
219 panic!("Cannot use GC for non-KV-separated tree");
220 }
221
222 pub fn drop_stale_segments(partition: &PartitionHandle) -> crate::Result<u64> {
223 if let AnyTree::Blob(tree) = &partition.tree {
224 return tree.gc_drop_stale().map_err(Into::into);
225 }
226 panic!("Cannot use GC for non-KV-separated tree");
227 }
228}