Skip to main content

xet_runtime/config/groups/
shard.rs

1use crate::utils::ByteSize;
2
3crate::config_group!({
4
5    /// The target shard size in bytes.
6    ///
7    /// The default value is 67108864.
8    ///
9    /// Use the environment variable `HF_XET_SHARD_TARGET_SIZE` to set this value.
10    ref target_size: u64 = 64 * 1024 * 1024;
11
12    /// Maximum shard size; small shards are aggregated until they are at most this.
13    ///
14    /// The default value is 67108864.
15    ///
16    /// Use the environment variable `HF_XET_SHARD_MAX_TARGET_SIZE` to set this value.
17    ref max_target_size: u64 = 64 * 1024 * 1024;
18
19    /// The (soft) maximum size in bytes of the shard cache.  Default is 16 GB.
20    ///
21    /// As a rough calculation, a cache of size X will allow for dedup against data
22    /// of size 1000 * X.  The default would allow a 16 TB repo to be deduped effectively.
23    ///
24    /// Note the cache is pruned to below this value at the beginning of a session,
25    /// but during a single session new shards may be added such that this limit is exceeded.
26    ///
27    /// The default value is 16gb.
28    ///
29    /// Use the environment variable `HF_XET_SHARD_CACHE_SIZE_LIMIT` to set this value.
30    ref cache_size_limit : ByteSize = ByteSize::from("16gb");
31
32    /// The maximum size of the chunk index table that's stored in memory.  After this,
33    /// no new chunks are loaded for deduplication.
34    ///
35    /// The default value is 67108864.
36    ///
37    /// Use the environment variable `HF_XET_SHARD_CHUNK_INDEX_TABLE_MAX_SIZE` to set this value.
38    ref chunk_index_table_max_size: usize = 64 * 1024 * 1024;
39
40    /// Subdirectory name for shard cache within the endpoint cache directory.
41    ///
42    /// The default value is "shard-cache".
43    ///
44    /// Use the environment variable `HF_XET_SHARD_CACHE_SUBDIR` to set this value.
45    ref cache_subdir: String = "shard-cache".to_string();
46
47});