use std::collections::VecDeque;
use xet_runtime::core::xet_config;
pub(crate) struct DefragPrevention {
rolling_last_nranges: VecDeque<usize>,
rolling_nranges_chunks: usize,
defrag_at_low_threshold: bool,
min_chunks_per_range: f32,
min_chunks_per_range_historesis_factor: f32,
}
impl DefragPrevention {
pub(crate) fn increment_last_range_in_fragmentation_estimate(&mut self, nchunks: usize) {
if let Some(back) = self.rolling_last_nranges.back_mut() {
*back += nchunks;
self.rolling_nranges_chunks += nchunks;
}
}
pub(crate) fn add_range_to_fragmentation_estimate(&mut self, nchunks: usize) {
self.rolling_last_nranges.push_back(nchunks);
self.rolling_nranges_chunks += nchunks;
if self.rolling_last_nranges.len() > xet_config().deduplication.nranges_in_streaming_fragmentation_estimator {
self.rolling_nranges_chunks -= self.rolling_last_nranges.pop_front().unwrap();
}
}
pub(crate) fn rolling_chunks_per_range(&self) -> Option<f32> {
if self.rolling_last_nranges.len() < xet_config().deduplication.nranges_in_streaming_fragmentation_estimator {
None
} else {
Some(self.rolling_nranges_chunks as f32 / self.rolling_last_nranges.len() as f32)
}
}
pub(crate) fn allow_dedup_on_next_range(&mut self, dedup_range_size: usize) -> bool {
let Some(chunks_per_range) = self.rolling_chunks_per_range() else {
return true;
};
let target_cpr = if self.defrag_at_low_threshold {
self.min_chunks_per_range * self.min_chunks_per_range_historesis_factor
} else {
self.min_chunks_per_range
};
if chunks_per_range < target_cpr {
if (dedup_range_size as f32) < chunks_per_range {
self.defrag_at_low_threshold = false;
return false;
}
} else {
self.defrag_at_low_threshold = true;
}
true
}
}
impl Default for DefragPrevention {
fn default() -> Self {
Self {
rolling_last_nranges: VecDeque::with_capacity(
xet_config().deduplication.nranges_in_streaming_fragmentation_estimator,
),
rolling_nranges_chunks: 0,
defrag_at_low_threshold: true,
min_chunks_per_range: xet_config().deduplication.min_n_chunks_per_range,
min_chunks_per_range_historesis_factor: xet_config().deduplication.min_n_chunks_per_range_hysteresis_factor,
}
}
}