use crate::error::Result;
use crate::integrity::Checksum;
use fjall::{Config, Keyspace, PartitionHandle};
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
#[derive(Debug, Clone, Serialize, Deserialize)]
struct ChecksumEntry {
mtime_secs: i64,
mtime_nanos: i32,
size: u64,
checksum_type: String,
checksum: Vec<u8>,
updated_at: i64,
}
#[allow(dead_code)] pub struct ChecksumDatabase {
keyspace: Keyspace,
partition: PartitionHandle,
}
#[allow(dead_code)] impl ChecksumDatabase {
const DB_DIR: &'static str = ".sy-checksums";
const PARTITION_NAME: &'static str = "checksums";
pub fn open(dest_path: &Path) -> Result<Self> {
let db_path = dest_path.join(Self::DB_DIR);
let keyspace = Config::new(&db_path).open()?;
let partition = keyspace.open_partition(Self::PARTITION_NAME, Default::default())?;
Ok(Self {
keyspace,
partition,
})
}
fn path_to_key(path: &Path) -> Vec<u8> {
path.to_string_lossy().as_bytes().to_vec()
}
pub fn get_checksum(
&self,
path: &Path,
mtime: SystemTime,
size: u64,
checksum_type: &str,
) -> Result<Option<Checksum>> {
let key = Self::path_to_key(path);
let (mtime_secs, mtime_nanos) = system_time_to_parts(mtime);
let value = match self.partition.get(&key)? {
Some(v) => v,
None => {
tracing::debug!("Cache miss for {}", path.display());
return Ok(None);
}
};
let entry: ChecksumEntry = bincode::deserialize(&value).map_err(|e| {
crate::error::SyncError::Database(format!(
"Failed to deserialize checksum entry for {}: {}",
path.display(),
e
))
})?;
if entry.mtime_secs != mtime_secs || entry.mtime_nanos != mtime_nanos || entry.size != size
{
tracing::debug!("Metadata mismatch for {}", path.display());
return Ok(None);
}
if entry.checksum_type != checksum_type {
tracing::debug!(
"Checksum type mismatch for {}: expected {}, got {}",
path.display(),
checksum_type,
entry.checksum_type
);
return Ok(None);
}
let checksum = match entry.checksum_type.as_str() {
"fast" => Checksum::Fast(entry.checksum),
"cryptographic" => Checksum::Cryptographic(entry.checksum),
_ => {
tracing::warn!("Unknown checksum type in database: {}", entry.checksum_type);
return Ok(None);
}
};
tracing::debug!("Cache hit for {}", path.display());
Ok(Some(checksum))
}
pub fn store_checksum(
&self,
path: &Path,
mtime: SystemTime,
size: u64,
checksum: &Checksum,
) -> Result<()> {
let (mtime_secs, mtime_nanos) = system_time_to_parts(mtime);
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
let (checksum_type, checksum_blob) = match checksum {
Checksum::None => return Ok(()), Checksum::Fast(bytes) => ("fast", bytes.clone()),
Checksum::Cryptographic(bytes) => ("cryptographic", bytes.clone()),
};
let entry = ChecksumEntry {
mtime_secs,
mtime_nanos,
size,
checksum_type: checksum_type.to_string(),
checksum: checksum_blob,
updated_at: now,
};
let key = Self::path_to_key(path);
let value = bincode::serialize(&entry)?;
self.partition.insert(&key, &value)?;
tracing::debug!("Stored checksum for {}", path.display());
Ok(())
}
pub fn clear(&self) -> Result<()> {
let keys: Vec<_> = self
.partition
.iter()
.map(|item| item.map(|(k, _)| k.to_vec()))
.collect::<std::result::Result<_, _>>()?;
for key in keys {
self.partition.remove(&key)?;
}
tracing::info!("Cleared checksum database");
Ok(())
}
pub fn prune(&self, existing_files: &HashSet<PathBuf>) -> Result<usize> {
let mut to_delete = Vec::new();
for item in self.partition.iter() {
let (key, _) = item?;
let path_str = String::from_utf8_lossy(&key);
let path = PathBuf::from(path_str.as_ref());
if !existing_files.contains(&path) {
to_delete.push(key.to_vec());
}
}
let deleted_count = to_delete.len();
for key in to_delete {
self.partition.remove(&key)?;
}
if deleted_count > 0 {
tracing::info!(
"Pruned {} stale entries from checksum database",
deleted_count
);
}
Ok(deleted_count)
}
pub fn stats(&self) -> Result<ChecksumDbStats> {
let mut total_entries = 0;
let mut fast_count = 0;
let mut crypto_count = 0;
for item in self.partition.iter() {
let (key, value) = item?;
let entry: ChecksumEntry = bincode::deserialize(&value).map_err(|e| {
crate::error::SyncError::Database(format!(
"Failed to deserialize checksum entry for {}: {}",
String::from_utf8_lossy(&key),
e
))
})?;
total_entries += 1;
match entry.checksum_type.as_str() {
"fast" => fast_count += 1,
"cryptographic" => crypto_count += 1,
_ => {}
}
}
Ok(ChecksumDbStats {
total_entries,
fast_checksums: fast_count,
cryptographic_checksums: crypto_count,
})
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)] pub struct ChecksumDbStats {
pub total_entries: usize,
pub fast_checksums: usize,
pub cryptographic_checksums: usize,
}
#[allow(dead_code)] fn system_time_to_parts(time: SystemTime) -> (i64, i32) {
match time.duration_since(UNIX_EPOCH) {
Ok(duration) => (duration.as_secs() as i64, duration.subsec_nanos() as i32),
Err(_) => (0, 0), }
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_open_database() {
let temp_dir = TempDir::new().unwrap();
let db = ChecksumDatabase::open(temp_dir.path()).unwrap();
assert!(temp_dir.path().join(ChecksumDatabase::DB_DIR).exists());
let stats = db.stats().unwrap();
assert_eq!(stats.total_entries, 0);
}
#[test]
fn test_store_and_retrieve_checksum() {
let temp_dir = TempDir::new().unwrap();
let db = ChecksumDatabase::open(temp_dir.path()).unwrap();
let path = PathBuf::from("test/file.txt");
let mtime = SystemTime::now();
let size = 1024;
let checksum = Checksum::Fast(vec![1, 2, 3, 4, 5, 6, 7, 8]);
db.store_checksum(&path, mtime, size, &checksum).unwrap();
let retrieved = db.get_checksum(&path, mtime, size, "fast").unwrap();
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap(), checksum);
let stats = db.stats().unwrap();
assert_eq!(stats.total_entries, 1);
assert_eq!(stats.fast_checksums, 1);
}
#[test]
fn test_cache_miss_on_mtime_change() {
let temp_dir = TempDir::new().unwrap();
let db = ChecksumDatabase::open(temp_dir.path()).unwrap();
let path = PathBuf::from("test/file.txt");
let mtime1 = SystemTime::now();
let mtime2 = mtime1 + std::time::Duration::from_secs(10);
let size = 1024;
let checksum = Checksum::Fast(vec![1, 2, 3, 4]);
db.store_checksum(&path, mtime1, size, &checksum).unwrap();
let retrieved = db.get_checksum(&path, mtime2, size, "fast").unwrap();
assert!(retrieved.is_none());
}
#[test]
fn test_cache_miss_on_size_change() {
let temp_dir = TempDir::new().unwrap();
let db = ChecksumDatabase::open(temp_dir.path()).unwrap();
let path = PathBuf::from("test/file.txt");
let mtime = SystemTime::now();
let size1 = 1024;
let size2 = 2048;
let checksum = Checksum::Fast(vec![1, 2, 3, 4]);
db.store_checksum(&path, mtime, size1, &checksum).unwrap();
let retrieved = db.get_checksum(&path, mtime, size2, "fast").unwrap();
assert!(retrieved.is_none());
}
#[test]
fn test_clear_database() {
let temp_dir = TempDir::new().unwrap();
let db = ChecksumDatabase::open(temp_dir.path()).unwrap();
let path = PathBuf::from("test/file.txt");
let mtime = SystemTime::now();
let size = 1024;
let checksum = Checksum::Fast(vec![1, 2, 3, 4]);
db.store_checksum(&path, mtime, size, &checksum).unwrap();
assert_eq!(db.stats().unwrap().total_entries, 1);
db.clear().unwrap();
assert_eq!(db.stats().unwrap().total_entries, 0);
}
#[test]
fn test_prune_stale_entries() {
let temp_dir = TempDir::new().unwrap();
let db = ChecksumDatabase::open(temp_dir.path()).unwrap();
let mtime = SystemTime::now();
let size = 1024;
let checksum = Checksum::Fast(vec![1, 2, 3, 4]);
db.store_checksum(&PathBuf::from("file1.txt"), mtime, size, &checksum)
.unwrap();
db.store_checksum(&PathBuf::from("file2.txt"), mtime, size, &checksum)
.unwrap();
db.store_checksum(&PathBuf::from("file3.txt"), mtime, size, &checksum)
.unwrap();
assert_eq!(db.stats().unwrap().total_entries, 3);
let mut existing = HashSet::new();
existing.insert(PathBuf::from("file1.txt"));
existing.insert(PathBuf::from("file2.txt"));
let pruned = db.prune(&existing).unwrap();
assert_eq!(pruned, 1); assert_eq!(db.stats().unwrap().total_entries, 2);
}
#[test]
fn test_cryptographic_checksum_storage() {
let temp_dir = TempDir::new().unwrap();
let db = ChecksumDatabase::open(temp_dir.path()).unwrap();
let path = PathBuf::from("test/file.txt");
let mtime = SystemTime::now();
let size = 1024;
let checksum = Checksum::Cryptographic(vec![0xde, 0xad, 0xbe, 0xef]);
db.store_checksum(&path, mtime, size, &checksum).unwrap();
let retrieved = db
.get_checksum(&path, mtime, size, "cryptographic")
.unwrap();
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap(), checksum);
let retrieved_wrong = db.get_checksum(&path, mtime, size, "fast").unwrap();
assert!(retrieved_wrong.is_none());
let stats = db.stats().unwrap();
assert_eq!(stats.cryptographic_checksums, 1);
assert_eq!(stats.fast_checksums, 0);
}
#[test]
fn test_update_existing_checksum() {
let temp_dir = TempDir::new().unwrap();
let db = ChecksumDatabase::open(temp_dir.path()).unwrap();
let path = PathBuf::from("test/file.txt");
let mtime = SystemTime::now();
let size = 1024;
let checksum1 = Checksum::Fast(vec![1, 2, 3, 4]);
let checksum2 = Checksum::Fast(vec![5, 6, 7, 8]);
db.store_checksum(&path, mtime, size, &checksum1).unwrap();
assert_eq!(db.stats().unwrap().total_entries, 1);
db.store_checksum(&path, mtime, size, &checksum2).unwrap();
assert_eq!(db.stats().unwrap().total_entries, 1);
let retrieved = db.get_checksum(&path, mtime, size, "fast").unwrap();
assert_eq!(retrieved.unwrap(), checksum2);
}
}