xet_runtime/config/groups/shard.rs
1use crate::utils::ByteSize;
2
3crate::config_group!({
4
5 /// The target shard size in bytes.
6 ///
7 /// The default value is 67108864.
8 ///
9 /// Use the environment variable `HF_XET_SHARD_TARGET_SIZE` to set this value.
10 ref target_size: u64 = 64 * 1024 * 1024;
11
12 /// Maximum shard size; small shards are aggregated until they are at most this.
13 ///
14 /// The default value is 67108864.
15 ///
16 /// Use the environment variable `HF_XET_SHARD_MAX_TARGET_SIZE` to set this value.
17 ref max_target_size: u64 = 64 * 1024 * 1024;
18
19 /// The (soft) maximum size in bytes of the shard cache. Default is 16 GB.
20 ///
21 /// As a rough calculation, a cache of size X will allow for dedup against data
22 /// of size 1000 * X. The default would allow a 16 TB repo to be deduped effectively.
23 ///
24 /// Note the cache is pruned to below this value at the beginning of a session,
25 /// but during a single session new shards may be added such that this limit is exceeded.
26 ///
27 /// The default value is 16gb.
28 ///
29 /// Use the environment variable `HF_XET_SHARD_CACHE_SIZE_LIMIT` to set this value.
30 ref cache_size_limit : ByteSize = ByteSize::from("16gb");
31
32 /// The maximum size of the chunk index table that's stored in memory. After this,
33 /// no new chunks are loaded for deduplication.
34 ///
35 /// The default value is 67108864.
36 ///
37 /// Use the environment variable `HF_XET_SHARD_CHUNK_INDEX_TABLE_MAX_SIZE` to set this value.
38 ref chunk_index_table_max_size: usize = 64 * 1024 * 1024;
39
40 /// Subdirectory name for shard cache within the endpoint cache directory.
41 ///
42 /// The default value is "shard-cache".
43 ///
44 /// Use the environment variable `HF_XET_SHARD_CACHE_SUBDIR` to set this value.
45 ref cache_subdir: String = "shard-cache".to_string();
46
47});