lsm_tree/vlog/blob_file/
mod.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5pub mod merge;
6pub mod meta;
7pub mod multi_writer;
8pub mod reader;
9pub mod scanner;
10pub mod writer;
11
12use crate::{blob_tree::FragmentationMap, vlog::BlobFileId};
13pub use meta::Metadata;
14use std::{
15    path::PathBuf,
16    sync::{atomic::AtomicBool, Arc},
17};
18
19/// A blob file is an immutable, sorted, contiguous file that contains large key-value pairs (blobs)
20#[derive(Debug)]
21pub struct Inner {
22    /// Blob file ID
23    pub id: BlobFileId,
24
25    /// File path
26    pub path: PathBuf,
27
28    /// Statistics
29    pub meta: Metadata,
30
31    /// Whether this blob file is deleted (logically)
32    pub is_deleted: AtomicBool,
33}
34
35impl Drop for Inner {
36    fn drop(&mut self) {
37        if self.is_deleted.load(std::sync::atomic::Ordering::Acquire) {
38            log::trace!(
39                "Cleanup deleted blob file {:?} at {}",
40                self.id,
41                self.path.display(),
42            );
43
44            if let Err(e) = std::fs::remove_file(&*self.path) {
45                log::warn!(
46                    "Failed to cleanup deleted blob file {:?} at {}: {e:?}",
47                    self.id,
48                    self.path.display(),
49                );
50            }
51        }
52    }
53}
54
55/// A blob file stores large values and is part of the value log
56#[derive(Clone)]
57pub struct BlobFile(pub(crate) Arc<Inner>);
58
59impl Eq for BlobFile {}
60
61impl PartialEq for BlobFile {
62    fn eq(&self, other: &Self) -> bool {
63        self.id().eq(&other.id())
64    }
65}
66
67impl std::hash::Hash for BlobFile {
68    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
69        self.id().hash(state);
70    }
71}
72
73impl BlobFile {
74    pub(crate) fn mark_as_deleted(&self) {
75        self.0
76            .is_deleted
77            .store(true, std::sync::atomic::Ordering::Release);
78    }
79
80    /// Returns the blob file ID.
81    #[must_use]
82    pub fn id(&self) -> BlobFileId {
83        self.0.id
84    }
85
86    /// Returns the number of items in the blob file.
87    #[must_use]
88    #[allow(clippy::len_without_is_empty)]
89    pub fn len(&self) -> u64 {
90        self.0.meta.item_count
91    }
92
93    /// Returns `true` if the blob file is stale (based on the given staleness threshold).
94    pub(crate) fn is_stale(&self, frag_map: &FragmentationMap, threshold: f32) -> bool {
95        frag_map.get(&self.id()).is_some_and(|x| {
96            let stale_bytes = x.bytes as f32;
97            let all_bytes = self.0.meta.total_uncompressed_bytes as f32;
98            let ratio = stale_bytes / all_bytes;
99            ratio >= threshold
100        })
101    }
102
103    /// Returns `true` if the blob file has no more incoming references, and can be safely removed from a Version.
104    pub(crate) fn is_dead(&self, frag_map: &FragmentationMap) -> bool {
105        frag_map.get(&self.id()).is_some_and(|x| {
106            let stale_bytes = x.bytes;
107            let all_bytes = self.0.meta.total_uncompressed_bytes;
108            stale_bytes == all_bytes
109        })
110    }
111}