Skip to main content

xet_runtime/config/groups/
deduplication.rs

1crate::config_group!({
2    /// Number of ranges to use when estimating fragmentation
3    ///
4    /// The default value is 128.
5    ///
6    /// Use the environment variable `HF_XET_DEDUPLICATION_NRANGES_IN_STREAMING_FRAGMENTATION_ESTIMATOR` to set this value.
7    ref nranges_in_streaming_fragmentation_estimator: usize = 128;
8
9    /// Minimum number of chunks per range. Used to control fragmentation
10    /// This targets an average of 1MB per range.
11    /// The hysteresis factor multiplied by the target Chunks Per Range (CPR) controls
12    /// the low end of the hysteresis range. Basically, dedupe will stop
13    /// when CPR drops below hysteresis * target_cpr, and will start again when
14    /// CPR increases above target CPR.
15    ///
16    /// The default value is 0.5.
17    ///
18    /// Use the environment variable `HF_XET_DEDUPLICATION_MIN_N_CHUNKS_PER_RANGE_HYSTERESIS_FACTOR` to set this value.
19    ref min_n_chunks_per_range_hysteresis_factor: f32 = 0.5;
20    /// Minimum number of chunks per range.
21    ///
22    /// The default value is 8.0.
23    ///
24    /// Use the environment variable `HF_XET_DEDUPLICATION_MIN_N_CHUNKS_PER_RANGE` to set this value.
25    ref min_n_chunks_per_range: f32 = 8.0;
26
27    /// Whether to enable global deduplication queries to the server.
28    /// When enabled, the system will query the server for deduplication shards
29    /// based on chunk hashes to enable cross-repository deduplication.
30    ///
31    /// The default value is true.
32    ///
33    /// Use the environment variable `HF_XET_DEDUPLICATION_GLOBAL_DEDUP_QUERY_ENABLED` to set this value.
34    ref global_dedup_query_enabled: bool = true;
35});