use alloc::collections::BTreeMap;
use alloc::vec::Vec;
use lazy_static::lazy_static;
use spin::Mutex;
#[derive(Debug, Clone)]
pub struct FastDedupEntry {
pub checksum: [u8; 32],
pub block_ptr: u64,
pub refcount: u32,
pub last_access: u64,
pub access_count: u32,
}
lazy_static! {
static ref FAST_DEDUP: Mutex<FastDedupTable> = Mutex::new(FastDedupTable::new());
}
pub struct FastDedupTable {
table: BTreeMap<[u8; 32], FastDedupEntry>,
max_entries: usize,
timestamp: u64,
hits: u64,
misses: u64,
evictions: u64,
}
impl Default for FastDedupTable {
fn default() -> Self {
Self::new()
}
}
impl FastDedupTable {
const DEFAULT_MAX_ENTRIES: usize = 65536;
pub fn new() -> Self {
Self {
table: BTreeMap::new(),
max_entries: Self::DEFAULT_MAX_ENTRIES,
timestamp: 0,
hits: 0,
misses: 0,
evictions: 0,
}
}
pub fn with_capacity(max_entries: usize) -> Self {
Self {
table: BTreeMap::new(),
max_entries,
timestamp: 0,
hits: 0,
misses: 0,
evictions: 0,
}
}
pub fn lookup(&mut self, checksum: &[u8; 32]) -> Option<u64> {
if let Some(entry) = self.table.get_mut(checksum) {
self.timestamp += 1;
entry.last_access = self.timestamp;
entry.access_count += 1;
self.hits += 1;
Some(entry.block_ptr)
} else {
self.misses += 1;
None
}
}
pub fn insert(&mut self, checksum: [u8; 32], block_ptr: u64) -> bool {
if let Some(entry) = self.table.get_mut(&checksum) {
self.timestamp += 1;
entry.last_access = self.timestamp;
entry.refcount += 1;
return true;
}
if self.table.len() >= self.max_entries {
self.evict_lru();
}
self.timestamp += 1;
let entry = FastDedupEntry {
checksum,
block_ptr,
refcount: 1,
last_access: self.timestamp,
access_count: 1,
};
self.table.insert(checksum, entry);
true
}
pub fn incref(&mut self, checksum: &[u8; 32]) {
if let Some(entry) = self.table.get_mut(checksum) {
entry.refcount += 1;
}
}
pub fn decref(&mut self, checksum: &[u8; 32]) -> bool {
if let Some(entry) = self.table.get_mut(checksum) {
entry.refcount -= 1;
if entry.refcount == 0 {
self.table.remove(checksum);
return false;
}
return true;
}
false
}
fn evict_lru(&mut self) {
let oldest = self
.table
.iter()
.min_by_key(|(_, entry)| entry.last_access)
.map(|(checksum, _)| *checksum);
if let Some(checksum) = oldest {
self.table.remove(&checksum);
self.evictions += 1;
}
}
pub fn promote_hot_entries(&mut self, hot_threshold: u32) {
for entry in self.table.values_mut() {
if entry.access_count >= hot_threshold {
self.timestamp += 1;
entry.last_access = self.timestamp;
}
}
}
pub fn get_stats(&self) -> (u64, u64, usize, u64, f64) {
let total = self.hits + self.misses;
let hit_rate = if total > 0 {
(self.hits as f64 / total as f64) * 100.0
} else {
0.0
};
(
self.hits,
self.misses,
self.table.len(),
self.evictions,
hit_rate,
)
}
pub fn clear(&mut self) {
self.table.clear();
self.timestamp = 0;
self.hits = 0;
self.misses = 0;
self.evictions = 0;
}
pub fn dedup_ratio(&self) -> f64 {
if self.table.is_empty() {
return 1.0;
}
let total_refs: u32 = self.table.values().map(|e| e.refcount).sum();
let unique_blocks = self.table.len();
total_refs as f64 / unique_blocks as f64
}
}
pub struct FastDedupEngine;
impl FastDedupEngine {
pub fn init(max_entries: usize) {
let mut dedup = FAST_DEDUP.lock();
*dedup = FastDedupTable::with_capacity(max_entries);
}
pub fn lookup(checksum: &[u8; 32]) -> Option<u64> {
let mut dedup = FAST_DEDUP.lock();
dedup.lookup(checksum)
}
pub fn insert(checksum: [u8; 32], block_ptr: u64) -> bool {
let mut dedup = FAST_DEDUP.lock();
dedup.insert(checksum, block_ptr)
}
pub fn incref(checksum: &[u8; 32]) {
let mut dedup = FAST_DEDUP.lock();
dedup.incref(checksum);
}
pub fn decref(checksum: &[u8; 32]) -> bool {
let mut dedup = FAST_DEDUP.lock();
dedup.decref(checksum)
}
pub fn get_stats() -> (u64, u64, usize, u64, f64) {
let dedup = FAST_DEDUP.lock();
dedup.get_stats()
}
pub fn promote_hot(hot_threshold: u32) {
let mut dedup = FAST_DEDUP.lock();
dedup.promote_hot_entries(hot_threshold);
}
pub fn clear() {
let mut dedup = FAST_DEDUP.lock();
dedup.clear();
}
pub fn dedup_ratio() -> f64 {
let dedup = FAST_DEDUP.lock();
dedup.dedup_ratio()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fast_dedup_insert_lookup() {
let mut dedup = FastDedupTable::new();
let checksum = [0x42u8; 32];
let block_ptr = 12345;
assert!(dedup.insert(checksum, block_ptr));
assert_eq!(dedup.lookup(&checksum), Some(block_ptr));
assert_eq!(dedup.hits, 1);
assert_eq!(dedup.misses, 0);
}
#[test]
fn test_fast_dedup_miss() {
let mut dedup = FastDedupTable::new();
let checksum = [0x42u8; 32];
assert_eq!(dedup.lookup(&checksum), None);
assert_eq!(dedup.hits, 0);
assert_eq!(dedup.misses, 1);
}
#[test]
fn test_fast_dedup_refcount() {
let mut dedup = FastDedupTable::new();
let checksum = [0x42u8; 32];
dedup.insert(checksum, 100);
dedup.incref(&checksum);
assert_eq!(
dedup
.table
.get(&checksum)
.expect("test: operation should succeed")
.refcount,
2
);
assert!(dedup.decref(&checksum)); assert_eq!(
dedup
.table
.get(&checksum)
.expect("test: operation should succeed")
.refcount,
1
);
assert!(!dedup.decref(&checksum)); assert!(!dedup.table.contains_key(&checksum));
}
#[test]
fn test_fast_dedup_eviction() {
let mut dedup = FastDedupTable::with_capacity(2);
let cs1 = [0x01u8; 32];
let cs2 = [0x02u8; 32];
let cs3 = [0x03u8; 32];
dedup.insert(cs1, 100);
dedup.insert(cs2, 200);
dedup.lookup(&cs2);
dedup.insert(cs3, 300);
assert!(!dedup.table.contains_key(&cs1)); assert!(dedup.table.contains_key(&cs2)); assert!(dedup.table.contains_key(&cs3)); assert_eq!(dedup.evictions, 1);
}
#[test]
fn test_fast_dedup_stats() {
let mut dedup = FastDedupTable::new();
let cs1 = [0x01u8; 32];
dedup.insert(cs1, 100);
dedup.lookup(&cs1); dedup.lookup(&[0x02u8; 32]);
let (hits, misses, entries, evictions, hit_rate) = dedup.get_stats();
assert_eq!(hits, 1);
assert_eq!(misses, 1);
assert_eq!(entries, 1);
assert_eq!(evictions, 0);
assert_eq!(hit_rate, 50.0); }
#[test]
fn test_dedup_ratio() {
let mut dedup = FastDedupTable::new();
let cs1 = [0x01u8; 32];
dedup.insert(cs1, 100);
dedup.incref(&cs1);
let cs2 = [0x02u8; 32];
dedup.insert(cs2, 200);
assert_eq!(dedup.dedup_ratio(), 1.5);
}
#[test]
fn test_promote_hot_entries() {
let mut dedup = FastDedupTable::new();
let cs1 = [0x01u8; 32];
dedup.insert(cs1, 100);
for _ in 0..10 {
dedup.lookup(&cs1);
}
let old_timestamp = dedup
.table
.get(&cs1)
.expect("test: operation should succeed")
.last_access;
dedup.promote_hot_entries(5);
let new_timestamp = dedup
.table
.get(&cs1)
.expect("test: operation should succeed")
.last_access;
assert!(new_timestamp > old_timestamp); }
}