xet_runtime/config/groups/deduplication.rs
1crate::config_group!({
2 /// Number of ranges to use when estimating fragmentation
3 ///
4 /// The default value is 128.
5 ///
6 /// Use the environment variable `HF_XET_DEDUPLICATION_NRANGES_IN_STREAMING_FRAGMENTATION_ESTIMATOR` to set this value.
7 ref nranges_in_streaming_fragmentation_estimator: usize = 128;
8
9 /// Minimum number of chunks per range. Used to control fragmentation
10 /// This targets an average of 1MB per range.
11 /// The hysteresis factor multiplied by the target Chunks Per Range (CPR) controls
12 /// the low end of the hysteresis range. Basically, dedupe will stop
13 /// when CPR drops below hysteresis * target_cpr, and will start again when
14 /// CPR increases above target CPR.
15 ///
16 /// The default value is 0.5.
17 ///
18 /// Use the environment variable `HF_XET_DEDUPLICATION_MIN_N_CHUNKS_PER_RANGE_HYSTERESIS_FACTOR` to set this value.
19 ref min_n_chunks_per_range_hysteresis_factor: f32 = 0.5;
20 /// Minimum number of chunks per range.
21 ///
22 /// The default value is 8.0.
23 ///
24 /// Use the environment variable `HF_XET_DEDUPLICATION_MIN_N_CHUNKS_PER_RANGE` to set this value.
25 ref min_n_chunks_per_range: f32 = 8.0;
26
27 /// Whether to enable global deduplication queries to the server.
28 /// When enabled, the system will query the server for deduplication shards
29 /// based on chunk hashes to enable cross-repository deduplication.
30 ///
31 /// The default value is true.
32 ///
33 /// Use the environment variable `HF_XET_DEDUPLICATION_GLOBAL_DEDUP_QUERY_ENABLED` to set this value.
34 ref global_dedup_query_enabled: bool = true;
35});