#![allow(dead_code)]
use std::collections::HashSet;
use std::io::{self, Write};
const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
const FNV_PRIME: u64 = 0x0000_0100_0000_01b3;
#[must_use]
pub fn fnv1a_64(data: &[u8]) -> u64 {
let mut hash = FNV_OFFSET;
for &byte in data {
hash ^= u64::from(byte);
hash = hash.wrapping_mul(FNV_PRIME);
}
hash
}
#[derive(Debug, Clone, Default)]
pub struct DedupStats {
pub total_writes: u64,
pub dedup_writes: u64,
pub total_bytes: u64,
pub written_bytes: u64,
}
impl DedupStats {
#[must_use]
pub fn dedup_ratio(&self) -> f64 {
if self.total_writes == 0 {
0.0
} else {
self.dedup_writes as f64 / self.total_writes as f64
}
}
#[must_use]
pub fn bytes_saved(&self) -> u64 {
self.total_bytes.saturating_sub(self.written_bytes)
}
}
pub struct DedupWriter<W: Write> {
inner: W,
seen: HashSet<u64>,
stats: DedupStats,
}
impl<W: Write> DedupWriter<W> {
pub fn new(inner: W) -> Self {
Self {
inner,
seen: HashSet::new(),
stats: DedupStats::default(),
}
}
#[must_use]
pub fn stats(&self) -> &DedupStats {
&self.stats
}
pub fn into_inner(self) -> (W, DedupStats) {
(self.inner, self.stats)
}
pub fn reset(&mut self) {
self.seen.clear();
}
pub fn reset_all(&mut self) {
self.seen.clear();
self.stats = DedupStats::default();
}
}
impl<W: Write> Write for DedupWriter<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let len = buf.len();
self.stats.total_writes += 1;
self.stats.total_bytes += len as u64;
let hash = fnv1a_64(buf);
if self.seen.contains(&hash) {
self.stats.dedup_writes += 1;
return Ok(len);
}
self.seen.insert(hash);
let n = self.inner.write(buf)?;
self.stats.written_bytes += n as u64;
Ok(n)
}
fn flush(&mut self) -> io::Result<()> {
self.inner.flush()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn test_fnv1a_deterministic() {
let h1 = fnv1a_64(b"hello world");
let h2 = fnv1a_64(b"hello world");
assert_eq!(h1, h2);
}
#[test]
fn test_fnv1a_different_data() {
let h1 = fnv1a_64(b"hello");
let h2 = fnv1a_64(b"world");
assert_ne!(h1, h2);
}
#[test]
fn test_dedup_skips_duplicate() {
let mut buf = Vec::new();
let mut writer = DedupWriter::new(&mut buf);
writer.write_all(b"hello").expect("write should succeed");
writer.write_all(b"hello").expect("write should succeed");
writer.write_all(b"world").expect("write should succeed");
assert_eq!(writer.stats().total_writes, 3);
assert_eq!(writer.stats().dedup_writes, 1);
assert_eq!(&buf, b"helloworld");
}
#[test]
fn test_stats_dedup_ratio() {
let mut buf = Vec::new();
let mut writer = DedupWriter::new(&mut buf);
writer.write_all(b"x").expect("write should succeed");
writer.write_all(b"x").expect("write should succeed");
writer.write_all(b"x").expect("write should succeed");
let stats = writer.stats();
assert_eq!(stats.total_writes, 3);
assert_eq!(stats.dedup_writes, 2);
assert!((stats.dedup_ratio() - 2.0 / 3.0).abs() < 1e-9);
}
#[test]
fn test_bytes_saved() {
let mut buf = Vec::new();
let mut writer = DedupWriter::new(&mut buf);
writer.write_all(b"hello").expect("write should succeed");
writer.write_all(b"hello").expect("write should succeed");
let stats = writer.stats().clone();
assert_eq!(stats.bytes_saved(), 5);
}
#[test]
fn test_reset_clears_seen_set() {
let mut buf = Vec::new();
let mut writer = DedupWriter::new(&mut buf);
writer.write_all(b"data").expect("write should succeed");
writer.reset();
writer.write_all(b"data").expect("write should succeed");
assert_eq!(writer.stats().dedup_writes, 0);
}
#[test]
fn test_reset_all_clears_stats() {
let mut buf = Vec::new();
let mut writer = DedupWriter::new(&mut buf);
writer.write_all(b"abc").expect("write should succeed");
writer.write_all(b"abc").expect("write should succeed");
assert_eq!(writer.stats().total_writes, 2);
writer.reset_all();
assert_eq!(writer.stats().total_writes, 0);
assert_eq!(writer.stats().dedup_writes, 0);
assert_eq!(writer.stats().total_bytes, 0);
assert_eq!(writer.stats().written_bytes, 0);
writer.write_all(b"abc").expect("write should succeed");
assert_eq!(writer.stats().dedup_writes, 0);
}
#[test]
fn test_into_inner_returns_inner_and_stats() {
let buf = Vec::new();
let mut writer = DedupWriter::new(buf);
writer.write_all(b"hello").expect("write should succeed");
writer.write_all(b"world").expect("write should succeed");
writer.write_all(b"hello").expect("write should succeed");
let (inner, stats) = writer.into_inner();
assert_eq!(&inner, b"helloworld");
assert_eq!(stats.total_writes, 3);
assert_eq!(stats.dedup_writes, 1);
}
#[test]
fn test_dedup_ratio_no_writes() {
let buf = Vec::new();
let writer = DedupWriter::new(buf);
assert!((writer.stats().dedup_ratio() - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_flush_propagates() {
use std::io::Write;
let buf = Vec::new();
let mut writer = DedupWriter::new(buf);
assert!(writer.flush().is_ok());
}
#[test]
fn test_unique_blocks_all_written() {
let mut buf = Vec::new();
let mut writer = DedupWriter::new(&mut buf);
for i in 0u8..8 {
let block = vec![i; 4];
writer.write_all(&block).expect("write should succeed");
}
assert_eq!(writer.stats().total_writes, 8);
assert_eq!(writer.stats().dedup_writes, 0);
assert_eq!(writer.stats().written_bytes, 32);
assert_eq!(writer.stats().bytes_saved(), 0);
}
}