lsm_tree/
cache.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::segment::block::Header;
6use crate::segment::{Block, BlockOffset};
7use crate::{GlobalSegmentId, UserValue};
8use quick_cache::Weighter;
9use quick_cache::{sync::Cache as QuickCache, Equivalent};
10
11const TAG_BLOCK: u8 = 0;
12const TAG_BLOB: u8 = 1;
13
14#[derive(Clone)]
15enum Item {
16    Block(Block),
17    Blob(UserValue),
18}
19
20#[derive(Eq, std::hash::Hash, PartialEq)]
21struct CacheKey(u8, u64, u64, u64);
22
23impl Equivalent<CacheKey> for (u8, u64, u64, u64) {
24    fn equivalent(&self, key: &CacheKey) -> bool {
25        self.0 == key.0 && self.1 == key.1 && self.2 == key.2 && self.3 == key.3
26    }
27}
28
29impl From<(u8, u64, u64, u64)> for CacheKey {
30    fn from((tag, root_id, segment_id, offset): (u8, u64, u64, u64)) -> Self {
31        Self(tag, root_id, segment_id, offset)
32    }
33}
34
35#[derive(Clone)]
36struct BlockWeighter;
37
38impl Weighter<CacheKey, Item> for BlockWeighter {
39    fn weight(&self, _: &CacheKey, item: &Item) -> u64 {
40        use Item::{Blob, Block};
41
42        match item {
43            Block(b) => (Header::serialized_len() as u64) + u64::from(b.header.uncompressed_length),
44            Blob(b) => b.len() as u64,
45        }
46    }
47}
48
49/// Cache, in which blocks or blobs are cached in-memory
50/// after being retrieved from disk
51///
52/// This speeds up consecutive queries to nearby data, improving
53/// read performance for hot data.
54///
55/// # Examples
56///
57/// Sharing cache between multiple trees
58///
59/// ```
60/// # use lsm_tree::{Tree, Config, Cache};
61/// # use std::sync::Arc;
62/// #
63/// // Provide 64 MB of cache capacity
64/// let cache = Arc::new(Cache::with_capacity_bytes(64 * 1_000 * 1_000));
65///
66/// # let folder = tempfile::tempdir()?;
67/// let tree1 = Config::new(folder).use_cache(cache.clone()).open()?;
68/// # let folder = tempfile::tempdir()?;
69/// let tree2 = Config::new(folder).use_cache(cache.clone()).open()?;
70/// #
71/// # Ok::<(), lsm_tree::Error>(())
72/// ```
73pub struct Cache {
74    // NOTE: rustc_hash performed best: https://fjall-rs.github.io/post/fjall-2-1
75    /// Concurrent cache implementation
76    data: QuickCache<CacheKey, Item, BlockWeighter, rustc_hash::FxBuildHasher>,
77
78    /// Capacity in bytes
79    capacity: u64,
80}
81
82impl Cache {
83    /// Creates a new block cache with roughly `n` bytes of capacity.
84    #[must_use]
85    pub fn with_capacity_bytes(bytes: u64) -> Self {
86        use quick_cache::sync::DefaultLifecycle;
87
88        // NOTE: Nothing we can do if it fails
89        #[allow(clippy::expect_used)]
90        let opts = quick_cache::OptionsBuilder::new()
91            .weight_capacity(bytes)
92            .hot_allocation(0.9)
93            .estimated_items_capacity(1_000_000)
94            .build()
95            .expect("cache options should be valid");
96
97        #[allow(clippy::default_trait_access)]
98        let quick_cache = QuickCache::with_options(
99            opts,
100            BlockWeighter,
101            Default::default(),
102            DefaultLifecycle::default(),
103        );
104
105        Self {
106            data: quick_cache,
107            capacity: bytes,
108        }
109    }
110
111    /// Returns the amount of cached bytes.
112    #[must_use]
113    pub fn size(&self) -> u64 {
114        self.data.weight()
115    }
116
117    /// Returns the cache capacity in bytes.
118    #[must_use]
119    pub fn capacity(&self) -> u64 {
120        self.capacity
121    }
122
123    /// Returns the number of cached blocks.
124    #[must_use]
125    pub fn len(&self) -> usize {
126        self.data.len()
127    }
128
129    /// Returns `true` if there are no cached blocks.
130    #[must_use]
131    pub fn is_empty(&self) -> bool {
132        self.data.is_empty()
133    }
134
135    #[doc(hidden)]
136    #[must_use]
137    pub fn get_block(&self, id: GlobalSegmentId, offset: BlockOffset) -> Option<Block> {
138        let key: CacheKey = (TAG_BLOCK, id.tree_id(), id.segment_id(), *offset).into();
139
140        Some(match self.data.get(&key)? {
141            Item::Block(block) => block,
142            Item::Blob(_) => unreachable!("invalid cache item"),
143        })
144    }
145
146    #[doc(hidden)]
147    pub fn insert_block(&self, id: GlobalSegmentId, offset: BlockOffset, block: Block) {
148        self.data.insert(
149            (TAG_BLOCK, id.tree_id(), id.segment_id(), *offset).into(),
150            Item::Block(block),
151        );
152    }
153
154    #[doc(hidden)]
155    pub fn insert_blob(
156        &self,
157        vlog_id: crate::TreeId,
158        vhandle: &crate::vlog::ValueHandle,
159        value: UserValue,
160    ) {
161        self.data.insert(
162            (TAG_BLOB, vlog_id, vhandle.blob_file_id, vhandle.offset).into(),
163            Item::Blob(value),
164        );
165    }
166
167    #[doc(hidden)]
168    #[must_use]
169    pub fn get_blob(
170        &self,
171        vlog_id: crate::TreeId,
172        vhandle: &crate::vlog::ValueHandle,
173    ) -> Option<UserValue> {
174        let key: CacheKey = (TAG_BLOB, vlog_id, vhandle.blob_file_id, vhandle.offset).into();
175
176        Some(match self.data.get(&key)? {
177            Item::Blob(blob) => blob,
178            Item::Block(_) => unreachable!("invalid cache item"),
179        })
180    }
181}