lsm_tree/vlog/blob_file/
mod.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5pub mod gc_stats;
6pub mod merge;
7pub mod meta;
8pub mod multi_writer;
9pub mod reader;
10pub mod trailer;
11pub mod writer;
12
13use crate::vlog::BlobFileId;
14pub use gc_stats::GcStats;
15pub use meta::Metadata;
16use std::{path::PathBuf, sync::Arc};
17
18/// A blob file is an immutable, sorted, contiguous file that contains large key-value pairs (blobs)
19#[derive(Debug)]
20pub(crate) struct Inner {
21    /// Blob file ID
22    pub id: BlobFileId,
23
24    /// File path
25    pub path: PathBuf,
26
27    /// Statistics
28    pub meta: Metadata,
29
30    /// Runtime stats for garbage collection
31    pub gc_stats: GcStats,
32    // TODO: is_deleted, on Drop, like SST segments
33}
34
35/// A blob file stores large values and is part of the value log
36#[derive(Clone)]
37pub struct BlobFile(pub(crate) Arc<Inner>);
38
39impl Eq for BlobFile {}
40
41impl PartialEq for BlobFile {
42    fn eq(&self, other: &Self) -> bool {
43        self.id().eq(&other.id())
44    }
45}
46
47impl std::hash::Hash for BlobFile {
48    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
49        self.id().hash(state);
50    }
51}
52
53impl BlobFile {
54    /// Returns the blob file ID.
55    #[must_use]
56    pub fn id(&self) -> BlobFileId {
57        self.0.id
58    }
59
60    /// Returns a scanner that can iterate through the blob file.
61    ///
62    /// # Errors
63    ///
64    /// Will return `Err` if an IO error occurs.
65    pub fn scan(&self) -> crate::Result<reader::Reader> {
66        reader::Reader::new(&self.0.path, self.id())
67    }
68
69    /// Returns the number of items in the blob file.
70    #[must_use]
71    #[allow(clippy::len_without_is_empty)]
72    pub fn len(&self) -> u64 {
73        self.0.meta.item_count
74    }
75
76    /// Marks the blob file as fully stale.
77    pub(crate) fn mark_as_stale(&self) {
78        self.0.gc_stats.set_stale_items(self.0.meta.item_count);
79
80        self.0
81            .gc_stats
82            .set_stale_bytes(self.0.meta.total_uncompressed_bytes);
83    }
84
85    /// Returns `true` if the blob file is fully stale.
86    #[must_use]
87    pub fn is_stale(&self) -> bool {
88        self.0.gc_stats.stale_items() == self.0.meta.item_count
89    }
90
91    /// Returns the percent of dead items in the blob file.
92    // NOTE: Precision is not important here
93    #[allow(clippy::cast_precision_loss)]
94    #[must_use]
95    pub fn stale_ratio(&self) -> f32 {
96        let dead = self.0.gc_stats.stale_items() as f32;
97        if dead == 0.0 {
98            return 0.0;
99        }
100
101        dead / self.0.meta.item_count as f32
102    }
103}