lsm_tree/
block_cache.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
// Copyright (c) 2024-present, fjall-rs
// This source code is licensed under both the Apache 2.0 and MIT License
// (found in the LICENSE-* files in the repository)

use crate::either::Either::{self, Left, Right};
use crate::segment::id::GlobalSegmentId;
use crate::segment::{block_index::IndexBlock, value_block::ValueBlock};
use quick_cache::Weighter;
use quick_cache::{sync::Cache, Equivalent};
use std::sync::Arc;

type Item = Either<Arc<ValueBlock>, Arc<IndexBlock>>;

// (Type (disk or index), Segment ID, Block offset)
#[derive(Eq, std::hash::Hash, PartialEq)]
struct CacheKey(GlobalSegmentId, u64);

impl Equivalent<CacheKey> for (GlobalSegmentId, u64) {
    fn equivalent(&self, key: &CacheKey) -> bool {
        self.0 == key.0 && self.1 == key.1
    }
}

impl From<(GlobalSegmentId, u64)> for CacheKey {
    fn from((gid, bid): (GlobalSegmentId, u64)) -> Self {
        Self(gid, bid)
    }
}

#[derive(Clone)]
struct BlockWeighter;

impl Weighter<CacheKey, Item> for BlockWeighter {
    fn weight(&self, _: &CacheKey, block: &Item) -> u64 {
        #[allow(clippy::cast_possible_truncation)]
        match block {
            Either::Left(block) => block.header.uncompressed_length.into(),
            Either::Right(block) => block.header.uncompressed_length.into(),
        }
    }
}

/// Block cache, in which blocks are cached in-memory
/// after being retrieved from disk
///
/// This speeds up consecutive queries to nearby data, improving
/// read performance for hot data.
///
/// # Examples
///
/// Sharing block cache between multiple trees
///
/// ```
/// # use lsm_tree::{Tree, Config, BlockCache};
/// # use std::sync::Arc;
/// #
/// // Provide 40 MB of cache capacity
/// let block_cache = Arc::new(BlockCache::with_capacity_bytes(40 * 1_000 * 1_000));
///
/// # let folder = tempfile::tempdir()?;
/// let tree1 = Config::new(folder).block_cache(block_cache.clone()).open()?;
/// # let folder = tempfile::tempdir()?;
/// let tree2 = Config::new(folder).block_cache(block_cache.clone()).open()?;
/// #
/// # Ok::<(), lsm_tree::Error>(())
/// ```
pub struct BlockCache {
    data: Cache<CacheKey, Item, BlockWeighter, xxhash_rust::xxh3::Xxh3Builder>,
    capacity: u64,
}

impl BlockCache {
    /// Creates a new block cache with roughly `n` bytes of capacity.
    #[must_use]
    pub fn with_capacity_bytes(bytes: u64) -> Self {
        use quick_cache::sync::DefaultLifecycle;

        Self {
            data: Cache::with(
                1_000_000,
                bytes,
                BlockWeighter,
                xxhash_rust::xxh3::Xxh3Builder::new(),
                DefaultLifecycle::default(),
            ),
            capacity: bytes,
        }
    }

    /// Returns the amount of cached bytes.
    #[must_use]
    pub fn size(&self) -> u64 {
        self.data.weight()
    }

    /// Returns the cache capacity in bytes.
    #[must_use]
    pub fn capacity(&self) -> u64 {
        self.capacity
    }

    /// Returns the number of cached blocks.
    #[must_use]
    pub fn len(&self) -> usize {
        self.data.len()
    }

    /// Returns `true` if there are no cached blocks.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.data.is_empty()
    }

    #[doc(hidden)]
    pub fn insert_disk_block(
        &self,
        segment_id: GlobalSegmentId,
        offset: u64,
        value: Arc<ValueBlock>,
    ) {
        if self.capacity > 0 {
            self.data.insert((segment_id, offset).into(), Left(value));
        }
    }

    #[doc(hidden)]
    pub fn insert_index_block(
        &self,
        segment_id: GlobalSegmentId,
        offset: u64,
        value: Arc<IndexBlock>,
    ) {
        if self.capacity > 0 {
            self.data.insert((segment_id, offset).into(), Right(value));
        }
    }

    #[doc(hidden)]
    #[must_use]
    pub fn get_disk_block(
        &self,
        segment_id: GlobalSegmentId,
        offset: u64,
    ) -> Option<Arc<ValueBlock>> {
        let key = (segment_id, offset);
        let item = self.data.get(&key)?;
        Some(item.left())
    }

    #[doc(hidden)]
    #[must_use]
    pub fn get_index_block(
        &self,
        segment_id: GlobalSegmentId,
        offset: u64,
    ) -> Option<Arc<IndexBlock>> {
        let key = (segment_id, offset);
        let item = self.data.get(&key)?;
        Some(item.right())
    }
}