lsm_tree/vlog/blob_file/
mod.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5pub mod merge;
6pub mod meta;
7pub mod multi_writer;
8pub mod reader;
9pub mod scanner;
10pub mod writer;
11
12use crate::{blob_tree::FragmentationMap, vlog::BlobFileId, Checksum};
13pub use meta::Metadata;
14use std::{
15    path::{Path, PathBuf},
16    sync::{atomic::AtomicBool, Arc},
17};
18
19/// A blob file is an immutable, sorted, contiguous file that contains large key-value pairs (blobs)
20#[derive(Debug)]
21pub struct Inner {
22    /// Blob file ID
23    pub id: BlobFileId,
24
25    /// File path
26    pub path: PathBuf,
27
28    /// Statistics
29    pub meta: Metadata,
30
31    /// Whether this blob file is deleted (logically)
32    pub is_deleted: AtomicBool,
33
34    pub checksum: Checksum,
35}
36
37impl Drop for Inner {
38    fn drop(&mut self) {
39        if self.is_deleted.load(std::sync::atomic::Ordering::Acquire) {
40            log::trace!(
41                "Cleanup deleted blob file {:?} at {}",
42                self.id,
43                self.path.display(),
44            );
45
46            if let Err(e) = std::fs::remove_file(&*self.path) {
47                log::warn!(
48                    "Failed to cleanup deleted blob file {:?} at {}: {e:?}",
49                    self.id,
50                    self.path.display(),
51                );
52            }
53        }
54    }
55}
56
57/// A blob file stores large values and is part of the value log
58#[derive(Clone)]
59pub struct BlobFile(pub(crate) Arc<Inner>);
60
61impl Eq for BlobFile {}
62
63impl PartialEq for BlobFile {
64    fn eq(&self, other: &Self) -> bool {
65        self.id().eq(&other.id())
66    }
67}
68
69impl std::hash::Hash for BlobFile {
70    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
71        self.id().hash(state);
72    }
73}
74
75impl BlobFile {
76    pub(crate) fn mark_as_deleted(&self) {
77        self.0
78            .is_deleted
79            .store(true, std::sync::atomic::Ordering::Release);
80    }
81
82    /// Returns the blob file ID.
83    #[must_use]
84    pub fn id(&self) -> BlobFileId {
85        self.0.id
86    }
87
88    /// Returns the full blob file checksum.
89    #[must_use]
90    pub fn checksum(&self) -> Checksum {
91        self.0.checksum
92    }
93
94    /// Returns the blob file path.
95    #[must_use]
96    pub fn path(&self) -> &Path {
97        &self.0.path
98    }
99
100    /// Returns the number of items in the blob file.
101    #[must_use]
102    #[allow(clippy::len_without_is_empty)]
103    pub fn len(&self) -> u64 {
104        self.0.meta.item_count
105    }
106
107    /// Returns `true` if the blob file is stale (based on the given staleness threshold).
108    pub(crate) fn is_stale(&self, frag_map: &FragmentationMap, threshold: f32) -> bool {
109        frag_map.get(&self.id()).is_some_and(|x| {
110            let stale_bytes = x.bytes as f32;
111            let all_bytes = self.0.meta.total_uncompressed_bytes as f32;
112            let ratio = stale_bytes / all_bytes;
113            ratio >= threshold
114        })
115    }
116
117    /// Returns `true` if the blob file has no more incoming references, and can be safely removed from a Version.
118    pub(crate) fn is_dead(&self, frag_map: &FragmentationMap) -> bool {
119        frag_map.get(&self.id()).is_some_and(|x| {
120            let stale_bytes = x.bytes;
121            let all_bytes = self.0.meta.total_uncompressed_bytes;
122            stale_bytes == all_bytes
123        })
124    }
125}