mountpoint_s3_fs/
data_cache.rs

1//! Traits and types for data caching.
2//!
3//! The data cache aims to reduce repeated fetches of S3 object content,
4//! reducing both the number of requests as well as the latency for the reads.
5//! Ultimately, this means reduced cost in terms of S3 billing as well as compute time.
6
7mod cache_directory;
8mod disk_data_cache;
9mod express_data_cache;
10mod in_memory_data_cache;
11mod multilevel_cache;
12
13use async_trait::async_trait;
14use thiserror::Error;
15
16pub use crate::checksums::ChecksummedBytes;
17pub use crate::data_cache::cache_directory::ManagedCacheDir;
18pub use crate::data_cache::disk_data_cache::{CacheLimit, DiskDataCache, DiskDataCacheConfig};
19pub use crate::data_cache::express_data_cache::{ExpressDataCache, ExpressDataCacheConfig, build_prefix, get_s3_key};
20pub use crate::data_cache::in_memory_data_cache::InMemoryDataCache;
21pub use crate::data_cache::multilevel_cache::MultilevelDataCache;
22
23use crate::object::ObjectId;
24
25/// Indexes blocks within a given object.
26pub type BlockIndex = u64;
27
28/// Errors returned by operations on a [DataCache]
29#[derive(Debug, Error)]
30pub enum DataCacheError {
31    #[error("IO error when reading or writing from cache: {0}")]
32    IoFailure(#[source] anyhow::Error),
33    #[error("Block header was not valid: {0}")]
34    InvalidBlockHeader(String),
35    #[error("Block checksum was not valid")]
36    InvalidBlockChecksum,
37    #[error("Block content was not valid/readable")]
38    InvalidBlockContent,
39    #[error("Block offset does not match block index")]
40    InvalidBlockOffset,
41    #[error("Error while trying to evict cache content")]
42    EvictionFailure,
43}
44
45pub type DataCacheResult<Value> = Result<Value, DataCacheError>;
46
47/// Data cache for fixed-size checksummed buffers.
48///
49/// TODO: Deletion and eviction of cache entries.
50/// TODO: Some version information (ETag) independent from [ObjectId] to allow smarter eviction?
51#[async_trait]
52pub trait DataCache {
53    /// Get block of data from the cache for the given [ObjectId] and [BlockIndex], if available.
54    ///
55    /// Operation may fail due to errors, or return [None] if the block was not available in the cache.
56    async fn get_block(
57        &self,
58        cache_key: &ObjectId,
59        block_idx: BlockIndex,
60        block_offset: u64,
61        object_size: usize,
62    ) -> DataCacheResult<Option<ChecksummedBytes>>;
63
64    /// Put block of data to the cache for the given [ObjectId] and [BlockIndex].
65    async fn put_block(
66        &self,
67        cache_key: ObjectId,
68        block_idx: BlockIndex,
69        block_offset: u64,
70        bytes: ChecksummedBytes,
71        object_size: usize,
72    ) -> DataCacheResult<()>;
73
74    /// Returns the block size for the data cache.
75    fn block_size(&self) -> u64;
76}
77
78/// Configuration for the data cache.
79///
80/// Can be configured to enable a local disk cache, a shared cache
81/// in S3 Express One Zone, or both.
82#[derive(Debug, Default)]
83pub struct DataCacheConfig {
84    pub disk_cache_config: Option<DiskDataCacheConfig>,
85    pub express_cache_config: Option<ExpressDataCacheConfig>,
86}