use anyhow::{Context, Result};
use rusqlite::OptionalExtension;
use std::path::Path;
use super::cache::HashCache;
use super::hash::{ContentHash, FileHasher};
use crate::db::SqliteStore;
use std::sync::Arc;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ChangeType {
None,
New(ContentHash),
Modified { old: ContentHash, new: ContentHash },
Deleted(ContentHash),
}
pub struct ChangeDetector {
cache: HashCache,
store: Arc<SqliteStore>,
}
impl ChangeDetector {
pub fn new(store: Arc<SqliteStore>) -> Self {
Self {
cache: HashCache::new(),
store,
}
}
pub fn with_capacity(store: Arc<SqliteStore>, capacity: usize) -> Self {
Self {
cache: HashCache::with_capacity(capacity),
store,
}
}
pub async fn detect_change(&mut self, file_id: i64, path: &Path) -> Result<ChangeType> {
let current_hash = FileHasher::hash_file(path)
.with_context(|| format!("Failed to hash file: {}", path.display()))?;
if let Some(cached_hash) = self.cache.get(path) {
if *cached_hash == current_hash {
return Ok(ChangeType::None);
} else {
let old_hash = *cached_hash;
self.cache.insert(path.to_path_buf(), current_hash);
return Ok(ChangeType::Modified {
old: old_hash,
new: current_hash,
});
}
}
let db_hash = get_hash_from_db(&self.store, file_id).await?;
let change_type = match db_hash {
Some(old_hash) => {
if old_hash == current_hash {
ChangeType::None
} else {
ChangeType::Modified {
old: old_hash,
new: current_hash,
}
}
}
None => {
ChangeType::New(current_hash)
}
};
self.cache.insert(path.to_path_buf(), current_hash);
Ok(change_type)
}
pub fn cache(&self) -> &HashCache {
&self.cache
}
pub fn cache_mut(&mut self) -> &mut HashCache {
&mut self.cache
}
pub fn clear_cache(&mut self) {
self.cache.clear();
}
pub async fn detect_deletion(&self, file_id: i64, path: &Path) -> Result<Option<ChangeType>> {
if path.exists() {
return Ok(None); }
if let Some(old_hash) = get_hash_from_db(&self.store, file_id).await? {
return Ok(Some(ChangeType::Deleted(old_hash)));
}
Ok(None) }
pub async fn detect_move(
&self,
new_path: &Path,
hash: &ContentHash,
) -> Result<Option<std::path::PathBuf>> {
let hex_str = hash.to_hex().to_string();
let new_path_str = new_path.to_string_lossy().to_string();
self.store
.run(move |conn| {
let result: Option<String> = conn
.query_row(
"SELECT relpath FROM files WHERE content_hash = ?1 AND relpath != ?2 LIMIT 1",
rusqlite::params![hex_str, new_path_str],
|row| row.get(0),
)
.optional()?;
Ok(result.map(std::path::PathBuf::from))
})
.await
}
pub async fn detect_changes_batch(
&mut self,
files: &[(i64, std::path::PathBuf)],
) -> Result<Vec<(i64, ChangeType)>> {
use std::collections::HashMap;
if files.is_empty() {
return Ok(Vec::new());
}
let mut file_hashes: HashMap<i64, ContentHash> = HashMap::with_capacity(files.len());
for (file_id, path) in files {
let hash = FileHasher::hash_file(path)
.with_context(|| format!("Failed to hash file: {}", path.display()))?;
file_hashes.insert(*file_id, hash);
}
let mut cache_misses = Vec::new();
let mut results: HashMap<i64, ChangeType> = HashMap::with_capacity(files.len());
for (file_id, path) in files {
let current_hash = file_hashes[file_id];
if let Some(cached_hash) = self.cache.get(path) {
if *cached_hash == current_hash {
results.insert(*file_id, ChangeType::None);
} else {
results.insert(
*file_id,
ChangeType::Modified {
old: *cached_hash,
new: current_hash,
},
);
self.cache.insert(path.to_path_buf(), current_hash);
}
} else {
cache_misses.push(*file_id);
}
}
if !cache_misses.is_empty() {
let db_hashes = get_hashes_batch_from_db(&self.store, &cache_misses).await?;
for file_id in cache_misses {
let current_hash = file_hashes[&file_id];
let path = &files.iter().find(|(id, _)| *id == file_id).unwrap().1;
let change_type = match db_hashes.get(&file_id) {
Some(old_hash) => {
if *old_hash == current_hash {
ChangeType::None
} else {
ChangeType::Modified {
old: *old_hash,
new: current_hash,
}
}
}
None => ChangeType::New(current_hash),
};
results.insert(file_id, change_type);
self.cache.insert(path.to_path_buf(), current_hash);
}
}
Ok(files
.iter()
.map(|(file_id, _)| (*file_id, results[file_id].clone()))
.collect())
}
}
pub async fn get_hash_from_db(store: &SqliteStore, file_id: i64) -> Result<Option<ContentHash>> {
store
.run(move |conn| {
let result: Option<String> = conn
.query_row(
"SELECT content_hash FROM files WHERE id = ?1",
rusqlite::params![file_id],
|row| row.get(0),
)
.optional()?;
match result {
Some(hex_str) => {
let hash = blake3::Hash::from_hex(&hex_str)
.map_err(|e| anyhow::anyhow!("Invalid hash in database: {}", e))?;
Ok(Some(hash))
}
None => Ok(None),
}
})
.await
}
pub async fn store_hash_in_db(store: &SqliteStore, file_id: i64, hash: ContentHash) -> Result<()> {
let hex_str = hash.to_hex().to_string();
store
.run(move |conn| {
conn.execute(
"UPDATE files SET content_hash = ?1 WHERE id = ?2",
rusqlite::params![hex_str, file_id],
)?;
Ok(())
})
.await
}
pub async fn get_hashes_batch_from_db(
store: &SqliteStore,
file_ids: &[i64],
) -> Result<std::collections::HashMap<i64, ContentHash>> {
use std::collections::HashMap;
if file_ids.is_empty() {
return Ok(HashMap::new());
}
let file_ids = file_ids.to_vec();
store
.run(move |conn| {
let mut hashes = HashMap::new();
let placeholders: String = file_ids.iter().map(|_| "?").collect::<Vec<_>>().join(",");
let sql = format!(
"SELECT id, content_hash FROM files WHERE id IN ({})",
placeholders
);
let mut stmt = conn.prepare(&sql)?;
let params: Vec<&dyn rusqlite::ToSql> = file_ids
.iter()
.map(|id| id as &dyn rusqlite::ToSql)
.collect();
let rows = stmt.query_map(params.as_slice(), |row| {
let id: i64 = row.get(0)?;
let hex_str: Option<String> = row.get(1)?;
Ok((id, hex_str))
})?;
for row_result in rows {
let (id, hex_str_opt) = row_result?;
if let Some(hex_str) = hex_str_opt {
if let Ok(hash) = blake3::Hash::from_hex(&hex_str) {
hashes.insert(id, hash);
}
}
}
Ok(hashes)
})
.await
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_change_type_equality() {
let hash1 = FileHasher::hash_bytes(b"content1");
let hash2 = FileHasher::hash_bytes(b"content2");
assert_eq!(ChangeType::None, ChangeType::None);
assert_eq!(ChangeType::New(hash1), ChangeType::New(hash1));
assert_ne!(ChangeType::New(hash1), ChangeType::New(hash2));
assert_eq!(
ChangeType::Modified {
old: hash1,
new: hash2
},
ChangeType::Modified {
old: hash1,
new: hash2
}
);
assert_ne!(
ChangeType::Modified {
old: hash1,
new: hash2
},
ChangeType::Modified {
old: hash2,
new: hash1
}
);
assert_eq!(ChangeType::Deleted(hash1), ChangeType::Deleted(hash1));
assert_ne!(ChangeType::Deleted(hash1), ChangeType::Deleted(hash2));
assert_ne!(ChangeType::None, ChangeType::New(hash1));
assert_ne!(
ChangeType::New(hash1),
ChangeType::Modified {
old: hash1,
new: hash2
}
);
assert_ne!(ChangeType::New(hash1), ChangeType::Deleted(hash1));
assert_ne!(
ChangeType::Modified {
old: hash1,
new: hash2
},
ChangeType::Deleted(hash1)
);
}
#[test]
fn test_change_type_clone() {
let hash1 = FileHasher::hash_bytes(b"content1");
let hash2 = FileHasher::hash_bytes(b"content2");
let change = ChangeType::Modified {
old: hash1,
new: hash2,
};
let cloned = change.clone();
assert_eq!(change, cloned);
}
#[test]
fn test_detector_new() {
}
#[test]
fn test_hash_file_for_change_detection() {
let mut file1 = NamedTempFile::new().unwrap();
let mut file2 = NamedTempFile::new().unwrap();
file1.write_all(b"content 1").unwrap();
file1.flush().unwrap();
file2.write_all(b"content 2").unwrap();
file2.flush().unwrap();
let hash1 = FileHasher::hash_file(file1.path()).unwrap();
let hash2 = FileHasher::hash_file(file2.path()).unwrap();
assert_ne!(hash1, hash2);
let hash1_again = FileHasher::hash_file(file1.path()).unwrap();
assert_eq!(hash1, hash1_again);
}
}