Skip to main content

xet_client/chunk_cache/
mod.rs

1mod cache_manager;
2mod disk;
3pub mod error;
4
5use std::path::PathBuf;
6
7use async_trait::async_trait;
8pub use cache_manager::get_cache;
9pub use disk::DiskCache;
10pub use disk::test_utils::*;
11use error::ChunkCacheError;
12#[cfg(test)]
13use mockall::automock;
14use xet_runtime::core::xet_config;
15
16use crate::cas_types::{ChunkRange, Key};
17
18/// Return dto for cache gets
19/// offsets has 1 more than then number of chunks in the specified range
20/// suppose the range is for chunks [2, 5) then offsets may look like:
21/// [0, 2000, 4000, 6000] where chunk 2 is made of bytes [0, 2000)
22/// chunk 3 [2000, 4000) and chunk 4 is [4000, 6000).
23/// It is guaranteed that the first number in offsets is 0 and the last number is data.len()
24#[derive(Debug)]
25pub struct CacheRange {
26    pub offsets: Vec<u32>,
27    pub data: Vec<u8>,
28    pub range: ChunkRange,
29}
30
31/// ChunkCache is a trait for storing and fetching Xorb ranges.
32/// implementors are expected to return bytes for a key and a given chunk range
33/// (no compression or further deserialization should be required)
34/// Range inputs use chunk indices in an end exclusive way i.e. [start, end)
35///
36/// implementors are allowed to evict data, a get after a put is not required to
37/// be a cache hit.
38#[cfg_attr(test, automock)]
39#[async_trait]
40pub trait ChunkCache: Sync + Send {
41    /// get should return an Ok() variant if significant error occurred, check the error
42    /// variant for issues with IO or parsing contents etc.
43    ///
44    /// if get returns an Ok(None) then there was no error, but there was a cache miss
45    /// otherwise returns an Ok(Some(data)) where data matches exactly the bytes for
46    /// the requested key and the requested chunk index range for that key
47    ///
48    /// Given implementors are expected to be able to evict members there's no guarantee
49    /// that a previously put range will be a cache hit
50    ///
51    /// key is required to be a valid XORB key
52    /// range is intended to be an index range within the xorb with constraint
53    ///     0 <= range.start < range.end <= num_chunks_in_xorb(key)
54    async fn get(&self, key: &Key, range: &ChunkRange) -> Result<Option<CacheRange>, ChunkCacheError>;
55
56    /// put should return Ok(()) if the put succeeded with no error, check the error
57    /// variant for issues with validating the input, cache state, IO, etc.
58    ///
59    /// put expects that chunk_byte_indices.len() is range.end - range.start + 1
60    /// with 1 entry for each start byte index for [range.start, range.end]
61    /// the first entry must be 0 (start of first chunk in the data)
62    /// the last entry must be data.len() i.e. the end of data, start of chunk past end
63    ///
64    /// key is required to be a valid XORB key
65    /// range is intended to be an index range within the xorb with constraint
66    ///     0 <= range.start < range.end <= num_chunks_in_xorb(key)
67    async fn put(
68        &self,
69        key: &Key,
70        range: &ChunkRange,
71        chunk_byte_indices: &[u32],
72        data: &[u8],
73    ) -> Result<(), ChunkCacheError>;
74}
75
76#[derive(Debug, Clone)]
77pub struct CacheConfig {
78    pub cache_directory: PathBuf,
79    pub cache_size: u64,
80}
81
82impl Default for CacheConfig {
83    fn default() -> Self {
84        CacheConfig {
85            cache_directory: PathBuf::from("/tmp"),
86            cache_size: xet_config().chunk_cache.size_bytes,
87        }
88    }
89}