lsm_tree/
cache.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::segment::block::offset::BlockOffset;
6use crate::segment::id::GlobalSegmentId;
7use crate::segment::{block_index::IndexBlock, value_block::ValueBlock};
8use crate::UserValue;
9use quick_cache::Weighter;
10use quick_cache::{sync::Cache as QuickCache, Equivalent};
11use std::sync::Arc;
12
13const TAG_BLOCK: u8 = 0;
14const TAG_BLOB: u8 = 1;
15
16#[derive(Clone)]
17enum Item {
18    DataBlock(Arc<ValueBlock>),
19    IndexBlock(Arc<IndexBlock>),
20    Blob(UserValue),
21}
22
23#[derive(Eq, std::hash::Hash, PartialEq)]
24struct CacheKey(u8, u64, u64, u64);
25
26impl Equivalent<CacheKey> for (u8, u64, u64, u64) {
27    fn equivalent(&self, key: &CacheKey) -> bool {
28        self.0 == key.0 && self.1 == key.1 && self.2 == key.2 && self.3 == key.3
29    }
30}
31
32impl From<(u8, u64, u64, u64)> for CacheKey {
33    fn from((tag, root_id, segment_id, offset): (u8, u64, u64, u64)) -> Self {
34        Self(tag, root_id, segment_id, offset)
35    }
36}
37
38#[derive(Clone)]
39struct BlockWeighter;
40
41impl Weighter<CacheKey, Item> for BlockWeighter {
42    fn weight(&self, _: &CacheKey, block: &Item) -> u64 {
43        #[allow(clippy::cast_possible_truncation)]
44        match block {
45            Item::DataBlock(block) => block.header.uncompressed_length.into(),
46            Item::IndexBlock(block) => block.header.uncompressed_length.into(),
47            Item::Blob(blob) => blob.len() as u64,
48        }
49    }
50}
51
52/// Cache, in which blocks or blobs are cached in-memory
53/// after being retrieved from disk
54///
55/// This speeds up consecutive queries to nearby data, improving
56/// read performance for hot data.
57///
58/// # Examples
59///
60/// Sharing cache between multiple trees
61///
62/// ```
63/// # use lsm_tree::{Tree, Config, Cache};
64/// # use std::sync::Arc;
65/// #
66/// // Provide 40 MB of cache capacity
67/// let cache = Arc::new(Cache::with_capacity_bytes(40 * 1_000 * 1_000));
68///
69/// # let folder = tempfile::tempdir()?;
70/// let tree1 = Config::new(folder).use_cache(cache.clone()).open()?;
71/// # let folder = tempfile::tempdir()?;
72/// let tree2 = Config::new(folder).use_cache(cache.clone()).open()?;
73/// #
74/// # Ok::<(), lsm_tree::Error>(())
75/// ```
76pub struct Cache {
77    // NOTE: rustc_hash performed best: https://fjall-rs.github.io/post/fjall-2-1
78    /// Concurrent cache implementation
79    data: QuickCache<CacheKey, Item, BlockWeighter, rustc_hash::FxBuildHasher>,
80
81    /// Capacity in bytes
82    capacity: u64,
83}
84
85impl Cache {
86    /// Creates a new block cache with roughly `n` bytes of capacity.
87    #[must_use]
88    pub fn with_capacity_bytes(bytes: u64) -> Self {
89        use quick_cache::sync::DefaultLifecycle;
90
91        #[allow(clippy::default_trait_access)]
92        let quick_cache = QuickCache::with(
93            1_000_000,
94            bytes,
95            BlockWeighter,
96            Default::default(),
97            DefaultLifecycle::default(),
98        );
99
100        Self {
101            data: quick_cache,
102            capacity: bytes,
103        }
104    }
105
106    /// Returns the amount of cached bytes.
107    #[must_use]
108    pub fn size(&self) -> u64 {
109        self.data.weight()
110    }
111
112    /// Returns the cache capacity in bytes.
113    #[must_use]
114    pub fn capacity(&self) -> u64 {
115        self.capacity
116    }
117
118    /// Returns the number of cached blocks.
119    #[must_use]
120    pub fn len(&self) -> usize {
121        self.data.len()
122    }
123
124    /// Returns `true` if there are no cached blocks.
125    #[must_use]
126    pub fn is_empty(&self) -> bool {
127        self.data.is_empty()
128    }
129
130    #[doc(hidden)]
131    pub fn insert_data_block(
132        &self,
133        id: GlobalSegmentId,
134        offset: BlockOffset,
135        value: Arc<ValueBlock>,
136    ) {
137        if self.capacity > 0 {
138            self.data.insert(
139                (TAG_BLOCK, id.tree_id(), id.segment_id(), *offset).into(),
140                Item::DataBlock(value),
141            );
142        }
143    }
144
145    #[doc(hidden)]
146    pub fn insert_index_block(
147        &self,
148        id: GlobalSegmentId,
149        offset: BlockOffset,
150        value: Arc<IndexBlock>,
151    ) {
152        if self.capacity > 0 {
153            self.data.insert(
154                (TAG_BLOCK, id.tree_id(), id.segment_id(), *offset).into(),
155                Item::IndexBlock(value),
156            );
157        }
158    }
159
160    #[doc(hidden)]
161    #[must_use]
162    pub fn get_data_block(
163        &self,
164        id: GlobalSegmentId,
165        offset: BlockOffset,
166    ) -> Option<Arc<ValueBlock>> {
167        let key: CacheKey = (TAG_BLOCK, id.tree_id(), id.segment_id(), *offset).into();
168
169        if let Item::DataBlock(block) = self.data.get(&key)? {
170            Some(block)
171        } else {
172            log::warn!("cache item type was unexpected - this is a bug");
173            None
174        }
175    }
176
177    #[doc(hidden)]
178    #[must_use]
179    pub fn get_index_block(
180        &self,
181        id: GlobalSegmentId,
182        offset: BlockOffset,
183    ) -> Option<Arc<IndexBlock>> {
184        let key: CacheKey = (TAG_BLOCK, id.tree_id(), id.segment_id(), *offset).into();
185
186        if let Item::IndexBlock(block) = self.data.get(&key)? {
187            Some(block)
188        } else {
189            log::warn!("cache item type was unexpected - this is a bug");
190            None
191        }
192    }
193
194    #[doc(hidden)]
195    pub fn insert_blob(
196        &self,
197        vlog_id: value_log::ValueLogId,
198        vhandle: &value_log::ValueHandle,
199        value: UserValue,
200    ) {
201        if self.capacity > 0 {
202            self.data.insert(
203                (TAG_BLOB, vlog_id, vhandle.segment_id, vhandle.offset).into(),
204                Item::Blob(value),
205            );
206        }
207    }
208
209    #[doc(hidden)]
210    #[must_use]
211    pub fn get_blob(
212        &self,
213        vlog_id: value_log::ValueLogId,
214        vhandle: &value_log::ValueHandle,
215    ) -> Option<UserValue> {
216        let key: CacheKey = (TAG_BLOB, vlog_id, vhandle.segment_id, vhandle.offset).into();
217
218        if let Item::Blob(blob) = self.data.get(&key)? {
219            Some(blob)
220        } else {
221            log::warn!("cache item type was unexpected - this is a bug");
222            None
223        }
224    }
225}