lsm_tree/
block_cache.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::either::Either::{self, Left, Right};
6use crate::segment::block::offset::BlockOffset;
7use crate::segment::id::GlobalSegmentId;
8use crate::segment::{block_index::IndexBlock, value_block::ValueBlock};
9use quick_cache::Weighter;
10use quick_cache::{sync::Cache, Equivalent};
11use std::sync::Arc;
12
13type Item = Either<Arc<ValueBlock>, Arc<IndexBlock>>;
14
15#[derive(Eq, std::hash::Hash, PartialEq)]
16struct CacheKey(GlobalSegmentId, BlockOffset);
17
18impl Equivalent<CacheKey> for (GlobalSegmentId, BlockOffset) {
19    fn equivalent(&self, key: &CacheKey) -> bool {
20        self.0 == key.0 && self.1 == key.1
21    }
22}
23
24impl From<(GlobalSegmentId, BlockOffset)> for CacheKey {
25    fn from((gid, bid): (GlobalSegmentId, BlockOffset)) -> Self {
26        Self(gid, bid)
27    }
28}
29
30#[derive(Clone)]
31struct BlockWeighter;
32
33impl Weighter<CacheKey, Item> for BlockWeighter {
34    fn weight(&self, _: &CacheKey, block: &Item) -> u64 {
35        #[allow(clippy::cast_possible_truncation)]
36        match block {
37            Either::Left(block) => block.header.uncompressed_length.into(),
38            Either::Right(block) => block.header.uncompressed_length.into(),
39        }
40    }
41}
42
43/// Block cache, in which blocks are cached in-memory
44/// after being retrieved from disk
45///
46/// This speeds up consecutive queries to nearby data, improving
47/// read performance for hot data.
48///
49/// # Examples
50///
51/// Sharing block cache between multiple trees
52///
53/// ```
54/// # use lsm_tree::{Tree, Config, BlockCache};
55/// # use std::sync::Arc;
56/// #
57/// // Provide 40 MB of cache capacity
58/// let block_cache = Arc::new(BlockCache::with_capacity_bytes(40 * 1_000 * 1_000));
59///
60/// # let folder = tempfile::tempdir()?;
61/// let tree1 = Config::new(folder).block_cache(block_cache.clone()).open()?;
62/// # let folder = tempfile::tempdir()?;
63/// let tree2 = Config::new(folder).block_cache(block_cache.clone()).open()?;
64/// #
65/// # Ok::<(), lsm_tree::Error>(())
66/// ```
67pub struct BlockCache {
68    // NOTE: rustc_hash performed best: https://fjall-rs.github.io/post/fjall-2-1
69    /// Concurrent cache implementation
70    data: Cache<CacheKey, Item, BlockWeighter, rustc_hash::FxBuildHasher>,
71
72    /// Capacity in bytes
73    capacity: u64,
74}
75
76impl BlockCache {
77    /// Creates a new block cache with roughly `n` bytes of capacity.
78    #[must_use]
79    pub fn with_capacity_bytes(bytes: u64) -> Self {
80        use quick_cache::sync::DefaultLifecycle;
81
82        #[allow(clippy::default_trait_access)]
83        let quick_cache = Cache::with(
84            1_000_000,
85            bytes,
86            BlockWeighter,
87            Default::default(),
88            DefaultLifecycle::default(),
89        );
90
91        Self {
92            data: quick_cache,
93            capacity: bytes,
94        }
95    }
96
97    /// Returns the amount of cached bytes.
98    #[must_use]
99    pub fn size(&self) -> u64 {
100        self.data.weight()
101    }
102
103    /// Returns the cache capacity in bytes.
104    #[must_use]
105    pub fn capacity(&self) -> u64 {
106        self.capacity
107    }
108
109    /// Returns the number of cached blocks.
110    #[must_use]
111    pub fn len(&self) -> usize {
112        self.data.len()
113    }
114
115    /// Returns `true` if there are no cached blocks.
116    #[must_use]
117    pub fn is_empty(&self) -> bool {
118        self.data.is_empty()
119    }
120
121    #[doc(hidden)]
122    pub fn insert_disk_block(
123        &self,
124        segment_id: GlobalSegmentId,
125        offset: BlockOffset,
126        value: Arc<ValueBlock>,
127    ) {
128        if self.capacity > 0 {
129            self.data.insert((segment_id, offset).into(), Left(value));
130        }
131    }
132
133    #[doc(hidden)]
134    pub fn insert_index_block(
135        &self,
136        segment_id: GlobalSegmentId,
137        offset: BlockOffset,
138        value: Arc<IndexBlock>,
139    ) {
140        if self.capacity > 0 {
141            self.data.insert((segment_id, offset).into(), Right(value));
142        }
143    }
144
145    #[doc(hidden)]
146    #[must_use]
147    pub fn get_disk_block(
148        &self,
149        segment_id: GlobalSegmentId,
150        offset: BlockOffset,
151    ) -> Option<Arc<ValueBlock>> {
152        let key = (segment_id, offset);
153        let item = self.data.get(&key)?;
154        Some(item.left())
155    }
156
157    #[doc(hidden)]
158    #[must_use]
159    pub fn get_index_block(
160        &self,
161        segment_id: GlobalSegmentId,
162        offset: BlockOffset,
163    ) -> Option<Arc<IndexBlock>> {
164        let key = (segment_id, offset);
165        let item = self.data.get(&key)?;
166        Some(item.right())
167    }
168}