lsm_tree/
cache.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::table::block::Header;
6use crate::table::{Block, BlockOffset};
7use crate::{GlobalTableId, UserValue};
8use quick_cache::Weighter;
9use quick_cache::{sync::Cache as QuickCache, Equivalent};
10
11const TAG_BLOCK: u8 = 0;
12const TAG_BLOB: u8 = 1;
13
14#[derive(Clone)]
15enum Item {
16    Block(Block),
17    Blob(UserValue),
18}
19
20#[derive(Eq, std::hash::Hash, PartialEq)]
21struct CacheKey(u8, u64, u64, u64);
22
23impl Equivalent<CacheKey> for (u8, u64, u64, u64) {
24    fn equivalent(&self, key: &CacheKey) -> bool {
25        self.0 == key.0 && self.1 == key.1 && self.2 == key.2 && self.3 == key.3
26    }
27}
28
29impl From<(u8, u64, u64, u64)> for CacheKey {
30    fn from((tag, root_id, table_id, offset): (u8, u64, u64, u64)) -> Self {
31        Self(tag, root_id, table_id, offset)
32    }
33}
34
35#[derive(Clone)]
36struct BlockWeighter;
37
38impl Weighter<CacheKey, Item> for BlockWeighter {
39    fn weight(&self, _: &CacheKey, item: &Item) -> u64 {
40        use Item::{Blob, Block};
41
42        match item {
43            Block(b) => (Header::serialized_len() as u64) + u64::from(b.header.uncompressed_length),
44            Blob(b) => b.len() as u64,
45        }
46    }
47}
48
49/// Cache, in which blocks or blobs are cached in-memory
50/// after being retrieved from disk
51///
52/// This speeds up consecutive queries to nearby data, improving
53/// read performance for hot data.
54///
55/// # Examples
56///
57/// Sharing cache between multiple trees
58///
59/// ```
60/// # use lsm_tree::{Tree, Config, Cache};
61/// # use std::sync::Arc;
62/// #
63/// // Provide 64 MB of cache capacity
64/// let cache = Arc::new(Cache::with_capacity_bytes(64 * 1_000 * 1_000));
65///
66/// # let folder = tempfile::tempdir()?;
67/// let tree1 = Config::new(folder).use_cache(cache.clone()).open()?;
68/// # let folder = tempfile::tempdir()?;
69/// let tree2 = Config::new(folder).use_cache(cache.clone()).open()?;
70/// #
71/// # Ok::<(), lsm_tree::Error>(())
72/// ```
73pub struct Cache {
74    // NOTE: rustc_hash performed best: https://fjall-rs.github.io/post/fjall-2-1
75    /// Concurrent cache implementation
76    data: QuickCache<CacheKey, Item, BlockWeighter, rustc_hash::FxBuildHasher>,
77
78    /// Capacity in bytes
79    capacity: u64,
80}
81
82impl Cache {
83    /// Creates a new block cache with roughly `n` bytes of capacity.
84    #[must_use]
85    pub fn with_capacity_bytes(bytes: u64) -> Self {
86        use quick_cache::sync::DefaultLifecycle;
87
88        #[expect(clippy::expect_used, reason = "nothing we can do if it fails")]
89        let opts = quick_cache::OptionsBuilder::new()
90            .weight_capacity(bytes)
91            .hot_allocation(0.9)
92            .estimated_items_capacity(1_000)
93            .build()
94            .expect("cache options should be valid");
95
96        let quick_cache = QuickCache::with_options(
97            opts,
98            BlockWeighter,
99            rustc_hash::FxBuildHasher,
100            DefaultLifecycle::default(),
101        );
102
103        Self {
104            data: quick_cache,
105            capacity: bytes,
106        }
107    }
108
109    /// Returns the amount of cached bytes.
110    #[must_use]
111    pub fn size(&self) -> u64 {
112        self.data.weight()
113    }
114
115    /// Returns the cache capacity in bytes.
116    #[must_use]
117    pub fn capacity(&self) -> u64 {
118        self.capacity
119    }
120
121    /// Returns the number of cached blocks.
122    #[must_use]
123    pub fn len(&self) -> usize {
124        self.data.len()
125    }
126
127    /// Returns `true` if there are no cached blocks.
128    #[must_use]
129    pub fn is_empty(&self) -> bool {
130        self.data.is_empty()
131    }
132
133    #[doc(hidden)]
134    #[must_use]
135    pub fn get_block(&self, id: GlobalTableId, offset: BlockOffset) -> Option<Block> {
136        let key: CacheKey = (TAG_BLOCK, id.tree_id(), id.table_id(), *offset).into();
137
138        Some(match self.data.get(&key)? {
139            Item::Block(block) => block,
140            Item::Blob(_) => unreachable!("invalid cache item"),
141        })
142    }
143
144    #[doc(hidden)]
145    pub fn insert_block(&self, id: GlobalTableId, offset: BlockOffset, block: Block) {
146        self.data.insert(
147            (TAG_BLOCK, id.tree_id(), id.table_id(), *offset).into(),
148            Item::Block(block),
149        );
150    }
151
152    #[doc(hidden)]
153    pub fn insert_blob(
154        &self,
155        vlog_id: crate::TreeId,
156        vhandle: &crate::vlog::ValueHandle,
157        value: UserValue,
158    ) {
159        self.data.insert(
160            (TAG_BLOB, vlog_id, vhandle.blob_file_id, vhandle.offset).into(),
161            Item::Blob(value),
162        );
163    }
164
165    #[doc(hidden)]
166    #[must_use]
167    pub fn get_blob(
168        &self,
169        vlog_id: crate::TreeId,
170        vhandle: &crate::vlog::ValueHandle,
171    ) -> Option<UserValue> {
172        let key: CacheKey = (TAG_BLOB, vlog_id, vhandle.blob_file_id, vhandle.offset).into();
173
174        Some(match self.data.get(&key)? {
175            Item::Blob(blob) => blob,
176            Item::Block(_) => unreachable!("invalid cache item"),
177        })
178    }
179}