use anyhow::{Context, Result};
use sha2::{Digest, Sha256};
use std::fs;
use std::path::{Path, PathBuf};
use std::time::SystemTime;
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct CacheEntry {
pub dump_path: String,
pub cache_key: String,
pub dump_size: u64,
pub dump_mtime: u64,
pub cache_size: u64,
pub created_at: u64,
pub table_count: usize,
pub row_count: u64,
}
#[derive(Debug, Default, serde::Serialize, serde::Deserialize)]
pub struct CacheIndex {
pub entries: Vec<CacheEntry>,
}
pub struct CacheManager {
cache_dir: PathBuf,
}
impl CacheManager {
pub fn new() -> Result<Self> {
let cache_dir = Self::default_cache_dir()?;
fs::create_dir_all(&cache_dir).context("Failed to create cache directory")?;
Ok(Self { cache_dir })
}
pub fn with_dir(cache_dir: PathBuf) -> Result<Self> {
fs::create_dir_all(&cache_dir).context("Failed to create cache directory")?;
Ok(Self { cache_dir })
}
pub fn default_cache_dir() -> Result<PathBuf> {
let cache_base = dirs::cache_dir()
.or_else(|| dirs::home_dir().map(|h| h.join(".cache")))
.context("Could not determine cache directory")?;
Ok(cache_base.join("sql-splitter").join("duckdb"))
}
pub fn compute_cache_key(dump_path: &Path) -> Result<String> {
let canonical = dump_path
.canonicalize()
.with_context(|| format!("Failed to canonicalize path: {}", dump_path.display()))?;
let metadata = fs::metadata(&canonical)
.with_context(|| format!("Failed to read metadata: {}", dump_path.display()))?;
let mtime = metadata
.modified()
.unwrap_or(SystemTime::UNIX_EPOCH)
.duration_since(SystemTime::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let key_input = format!("{}:{}:{}", canonical.display(), metadata.len(), mtime);
let mut hasher = Sha256::new();
hasher.update(key_input.as_bytes());
let hash = hasher.finalize();
Ok(hex::encode(&hash[..16])) }
pub fn cache_path(&self, cache_key: &str) -> PathBuf {
self.cache_dir.join(format!("{}.duckdb", cache_key))
}
pub fn has_valid_cache(&self, dump_path: &Path) -> Result<bool> {
let cache_key = Self::compute_cache_key(dump_path)?;
let cache_path = self.cache_path(&cache_key);
if !cache_path.exists() {
return Ok(false);
}
let dump_mtime = fs::metadata(dump_path)?
.modified()
.unwrap_or(SystemTime::UNIX_EPOCH);
let cache_mtime = fs::metadata(&cache_path)?
.modified()
.unwrap_or(SystemTime::UNIX_EPOCH);
Ok(cache_mtime > dump_mtime)
}
pub fn get_cache(&self, dump_path: &Path) -> Result<Option<PathBuf>> {
if self.has_valid_cache(dump_path)? {
let cache_key = Self::compute_cache_key(dump_path)?;
Ok(Some(self.cache_path(&cache_key)))
} else {
Ok(None)
}
}
pub fn create_cache(
&self,
dump_path: &Path,
table_count: usize,
row_count: u64,
) -> Result<PathBuf> {
let cache_key = Self::compute_cache_key(dump_path)?;
let cache_path = self.cache_path(&cache_key);
self.update_index(dump_path, &cache_key, table_count, row_count)?;
Ok(cache_path)
}
fn update_index(
&self,
dump_path: &Path,
cache_key: &str,
table_count: usize,
row_count: u64,
) -> Result<()> {
let mut index = self.load_index()?;
let metadata = fs::metadata(dump_path)?;
let dump_mtime = metadata
.modified()
.unwrap_or(SystemTime::UNIX_EPOCH)
.duration_since(SystemTime::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let cache_path = self.cache_path(cache_key);
let cache_size = fs::metadata(&cache_path).map(|m| m.len()).unwrap_or(0);
let entry = CacheEntry {
dump_path: dump_path.display().to_string(),
cache_key: cache_key.to_string(),
dump_size: metadata.len(),
dump_mtime,
cache_size,
created_at: SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0),
table_count,
row_count,
};
index
.entries
.retain(|e| e.dump_path != dump_path.display().to_string());
index.entries.push(entry);
self.save_index(&index)?;
Ok(())
}
pub fn load_index(&self) -> Result<CacheIndex> {
let index_path = self.cache_dir.join("index.json");
if !index_path.exists() {
return Ok(CacheIndex::default());
}
let content = fs::read_to_string(&index_path).context("Failed to read cache index")?;
serde_json::from_str(&content).context("Failed to parse cache index")
}
fn save_index(&self, index: &CacheIndex) -> Result<()> {
let index_path = self.cache_dir.join("index.json");
let content =
serde_json::to_string_pretty(index).context("Failed to serialize cache index")?;
fs::write(&index_path, content).context("Failed to write cache index")?;
Ok(())
}
pub fn list_entries(&self) -> Result<Vec<CacheEntry>> {
let index = self.load_index()?;
Ok(index.entries)
}
pub fn remove_cache(&self, cache_key: &str) -> Result<()> {
let cache_path = self.cache_path(cache_key);
if cache_path.exists() {
fs::remove_file(&cache_path).context("Failed to remove cache file")?;
}
let wal_path = cache_path.with_extension("duckdb.wal");
if wal_path.exists() {
fs::remove_file(&wal_path)?;
}
let mut index = self.load_index()?;
index.entries.retain(|e| e.cache_key != cache_key);
self.save_index(&index)?;
Ok(())
}
pub fn clear_all(&self) -> Result<usize> {
let entries = self.list_entries()?;
let count = entries.len();
for entry in entries {
self.remove_cache(&entry.cache_key)?;
}
Ok(count)
}
pub fn total_size(&self) -> Result<u64> {
let entries = self.list_entries()?;
Ok(entries.iter().map(|e| e.cache_size).sum())
}
pub fn cache_dir(&self) -> &Path {
&self.cache_dir
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn setup_test_cache() -> (CacheManager, TempDir) {
let temp_dir = TempDir::new().unwrap();
let cache_manager = CacheManager::with_dir(temp_dir.path().to_path_buf()).unwrap();
(cache_manager, temp_dir)
}
#[test]
fn test_cache_key_computation() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.sql");
fs::write(&test_file, "SELECT 1;").unwrap();
let key1 = CacheManager::compute_cache_key(&test_file).unwrap();
let key2 = CacheManager::compute_cache_key(&test_file).unwrap();
assert_eq!(key1, key2);
assert_eq!(key1.len(), 32); }
#[test]
fn test_cache_key_changes_with_content() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.sql");
fs::write(&test_file, "SELECT 1;").unwrap();
let key1 = CacheManager::compute_cache_key(&test_file).unwrap();
fs::write(&test_file, "SELECT 2; -- with extra content to change size").unwrap();
let key2 = CacheManager::compute_cache_key(&test_file).unwrap();
assert_ne!(key1, key2);
}
#[test]
fn test_cache_path() {
let (cache_manager, _temp_dir) = setup_test_cache();
let cache_path = cache_manager.cache_path("abc123");
assert!(cache_path.to_string_lossy().ends_with("abc123.duckdb"));
}
#[test]
fn test_has_valid_cache_when_missing() {
let (cache_manager, temp_dir) = setup_test_cache();
let test_file = temp_dir.path().join("test.sql");
fs::write(&test_file, "SELECT 1;").unwrap();
assert!(!cache_manager.has_valid_cache(&test_file).unwrap());
}
#[test]
fn test_list_entries_empty() {
let (cache_manager, _temp_dir) = setup_test_cache();
let entries = cache_manager.list_entries().unwrap();
assert!(entries.is_empty());
}
#[test]
fn test_total_size_empty() {
let (cache_manager, _temp_dir) = setup_test_cache();
assert_eq!(cache_manager.total_size().unwrap(), 0);
}
}