value_log/gc/
mod.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5pub mod report;
6
7use crate::{id::SegmentId, Compressor, ValueLog};
8
9/// GC strategy
10#[allow(clippy::module_name_repetitions)]
11pub trait GcStrategy<C: Compressor + Clone> {
12    /// Picks segments based on a predicate.
13    fn pick(&self, value_log: &ValueLog<C>) -> Vec<SegmentId>;
14}
15
16/// Picks segments that have a certain percentage of stale blobs
17pub struct StaleThresholdStrategy(f32);
18
19impl StaleThresholdStrategy {
20    /// Creates a new strategy with the given threshold.
21    ///
22    /// # Panics
23    ///
24    /// Panics if the ratio is invalid.
25    #[must_use]
26    pub fn new(ratio: f32) -> Self {
27        assert!(
28            ratio.is_finite() && ratio.is_sign_positive(),
29            "invalid stale ratio"
30        );
31        Self(ratio.min(1.0))
32    }
33}
34
35impl<C: Compressor + Clone> GcStrategy<C> for StaleThresholdStrategy {
36    fn pick(&self, value_log: &ValueLog<C>) -> Vec<SegmentId> {
37        value_log
38            .manifest
39            .segments
40            .read()
41            .expect("lock is poisoned")
42            .values()
43            .filter(|x| x.stale_ratio() > self.0)
44            .map(|x| x.id)
45            .collect::<Vec<_>>()
46    }
47}
48
49/// Tries to find a least-effort-selection of segments to merge to reach a certain space amplification
50pub struct SpaceAmpStrategy(f32);
51
52impl SpaceAmpStrategy {
53    /// Creates a new strategy with the given space amp factor.
54    ///
55    /// # Panics
56    ///
57    /// Panics if the space amp factor is < 1.0.
58    #[must_use]
59    pub fn new(ratio: f32) -> Self {
60        assert!(ratio >= 1.0, "invalid space amp ratio");
61        Self(ratio)
62    }
63}
64
65impl<C: Compressor + Clone> GcStrategy<C> for SpaceAmpStrategy {
66    #[allow(clippy::cast_precision_loss, clippy::significant_drop_tightening)]
67    fn pick(&self, value_log: &ValueLog<C>) -> Vec<SegmentId> {
68        let space_amp_target = self.0;
69        let current_space_amp = value_log.space_amp();
70
71        if current_space_amp < space_amp_target {
72            log::trace!("Space amp is <= target {space_amp_target}, nothing to do");
73            vec![]
74        } else {
75            log::debug!("Selecting segments to GC, space_amp_target={space_amp_target}");
76
77            let lock = value_log
78                .manifest
79                .segments
80                .read()
81                .expect("lock is poisoned");
82
83            let mut segments = lock
84                .values()
85                .filter(|x| x.stale_ratio() > 0.0)
86                .collect::<Vec<_>>();
87
88            // Sort by stale ratio descending
89            segments.sort_by(|a, b| {
90                b.stale_ratio()
91                    .partial_cmp(&a.stale_ratio())
92                    .unwrap_or(std::cmp::Ordering::Equal)
93            });
94
95            let mut selection = vec![];
96
97            let mut total_bytes = value_log.manifest.total_bytes();
98            let mut stale_bytes = value_log.manifest.stale_bytes();
99
100            for segment in segments {
101                let segment_stale_bytes = segment.gc_stats.stale_bytes();
102                stale_bytes -= segment_stale_bytes;
103                total_bytes -= segment_stale_bytes;
104
105                selection.push(segment.id);
106
107                let space_amp_after_gc =
108                    total_bytes as f32 / (total_bytes as f32 - stale_bytes as f32);
109
110                log::debug!(
111                    "Selected segment #{} for GC: will reduce space amp to {space_amp_after_gc}",
112                    segment.id
113                );
114
115                if space_amp_after_gc <= space_amp_target {
116                    break;
117                }
118            }
119
120            selection
121        }
122    }
123}