sql_splitter/duckdb/
cache.rs1use anyhow::{Context, Result};
6use sha2::{Digest, Sha256};
7use std::fs;
8use std::path::{Path, PathBuf};
9use std::time::SystemTime;
10
11#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
13pub struct CacheEntry {
14 pub dump_path: String,
16 pub cache_key: String,
18 pub dump_size: u64,
20 pub dump_mtime: u64,
22 pub cache_size: u64,
24 pub created_at: u64,
26 pub table_count: usize,
28 pub row_count: u64,
30}
31
32#[derive(Debug, Default, serde::Serialize, serde::Deserialize)]
34pub struct CacheIndex {
35 pub entries: Vec<CacheEntry>,
36}
37
38pub struct CacheManager {
40 cache_dir: PathBuf,
41}
42
43impl CacheManager {
44 pub fn new() -> Result<Self> {
46 let cache_dir = Self::default_cache_dir()?;
47 fs::create_dir_all(&cache_dir).context("Failed to create cache directory")?;
48 Ok(Self { cache_dir })
49 }
50
51 pub fn with_dir(cache_dir: PathBuf) -> Result<Self> {
53 fs::create_dir_all(&cache_dir).context("Failed to create cache directory")?;
54 Ok(Self { cache_dir })
55 }
56
57 pub fn default_cache_dir() -> Result<PathBuf> {
59 let cache_base = dirs::cache_dir()
60 .or_else(|| dirs::home_dir().map(|h| h.join(".cache")))
61 .context("Could not determine cache directory")?;
62
63 Ok(cache_base.join("sql-splitter").join("duckdb"))
64 }
65
66 pub fn compute_cache_key(dump_path: &Path) -> Result<String> {
68 let canonical = dump_path
69 .canonicalize()
70 .with_context(|| format!("Failed to canonicalize path: {}", dump_path.display()))?;
71
72 let metadata = fs::metadata(&canonical)
73 .with_context(|| format!("Failed to read metadata: {}", dump_path.display()))?;
74
75 let mtime = metadata
76 .modified()
77 .unwrap_or(SystemTime::UNIX_EPOCH)
78 .duration_since(SystemTime::UNIX_EPOCH)
79 .map(|d| d.as_secs())
80 .unwrap_or(0);
81
82 let key_input = format!("{}:{}:{}", canonical.display(), metadata.len(), mtime);
83
84 let mut hasher = Sha256::new();
85 hasher.update(key_input.as_bytes());
86 let hash = hasher.finalize();
87
88 Ok(hex::encode(&hash[..16])) }
90
91 pub fn cache_path(&self, cache_key: &str) -> PathBuf {
93 self.cache_dir.join(format!("{}.duckdb", cache_key))
94 }
95
96 pub fn has_valid_cache(&self, dump_path: &Path) -> Result<bool> {
98 let cache_key = Self::compute_cache_key(dump_path)?;
99 let cache_path = self.cache_path(&cache_key);
100
101 if !cache_path.exists() {
102 return Ok(false);
103 }
104
105 let dump_mtime = fs::metadata(dump_path)?
107 .modified()
108 .unwrap_or(SystemTime::UNIX_EPOCH);
109 let cache_mtime = fs::metadata(&cache_path)?
110 .modified()
111 .unwrap_or(SystemTime::UNIX_EPOCH);
112
113 Ok(cache_mtime > dump_mtime)
114 }
115
116 pub fn get_cache(&self, dump_path: &Path) -> Result<Option<PathBuf>> {
118 if self.has_valid_cache(dump_path)? {
119 let cache_key = Self::compute_cache_key(dump_path)?;
120 Ok(Some(self.cache_path(&cache_key)))
121 } else {
122 Ok(None)
123 }
124 }
125
126 pub fn create_cache(
128 &self,
129 dump_path: &Path,
130 table_count: usize,
131 row_count: u64,
132 ) -> Result<PathBuf> {
133 let cache_key = Self::compute_cache_key(dump_path)?;
134 let cache_path = self.cache_path(&cache_key);
135
136 self.update_index(dump_path, &cache_key, table_count, row_count)?;
138
139 Ok(cache_path)
140 }
141
142 fn update_index(
144 &self,
145 dump_path: &Path,
146 cache_key: &str,
147 table_count: usize,
148 row_count: u64,
149 ) -> Result<()> {
150 let mut index = self.load_index()?;
151
152 let metadata = fs::metadata(dump_path)?;
153 let dump_mtime = metadata
154 .modified()
155 .unwrap_or(SystemTime::UNIX_EPOCH)
156 .duration_since(SystemTime::UNIX_EPOCH)
157 .map(|d| d.as_secs())
158 .unwrap_or(0);
159
160 let cache_path = self.cache_path(cache_key);
161 let cache_size = fs::metadata(&cache_path).map(|m| m.len()).unwrap_or(0);
162
163 let entry = CacheEntry {
164 dump_path: dump_path.display().to_string(),
165 cache_key: cache_key.to_string(),
166 dump_size: metadata.len(),
167 dump_mtime,
168 cache_size,
169 created_at: SystemTime::now()
170 .duration_since(SystemTime::UNIX_EPOCH)
171 .map(|d| d.as_secs())
172 .unwrap_or(0),
173 table_count,
174 row_count,
175 };
176
177 index
179 .entries
180 .retain(|e| e.dump_path != dump_path.display().to_string());
181 index.entries.push(entry);
182
183 self.save_index(&index)?;
184 Ok(())
185 }
186
187 pub fn load_index(&self) -> Result<CacheIndex> {
189 let index_path = self.cache_dir.join("index.json");
190
191 if !index_path.exists() {
192 return Ok(CacheIndex::default());
193 }
194
195 let content = fs::read_to_string(&index_path).context("Failed to read cache index")?;
196 serde_json::from_str(&content).context("Failed to parse cache index")
197 }
198
199 fn save_index(&self, index: &CacheIndex) -> Result<()> {
201 let index_path = self.cache_dir.join("index.json");
202 let content =
203 serde_json::to_string_pretty(index).context("Failed to serialize cache index")?;
204 fs::write(&index_path, content).context("Failed to write cache index")?;
205 Ok(())
206 }
207
208 pub fn list_entries(&self) -> Result<Vec<CacheEntry>> {
210 let index = self.load_index()?;
211 Ok(index.entries)
212 }
213
214 pub fn remove_cache(&self, cache_key: &str) -> Result<()> {
216 let cache_path = self.cache_path(cache_key);
217
218 if cache_path.exists() {
219 fs::remove_file(&cache_path).context("Failed to remove cache file")?;
220 }
221
222 let wal_path = cache_path.with_extension("duckdb.wal");
224 if wal_path.exists() {
225 fs::remove_file(&wal_path)?;
226 }
227
228 let mut index = self.load_index()?;
230 index.entries.retain(|e| e.cache_key != cache_key);
231 self.save_index(&index)?;
232
233 Ok(())
234 }
235
236 pub fn clear_all(&self) -> Result<usize> {
238 let entries = self.list_entries()?;
239 let count = entries.len();
240
241 for entry in entries {
242 self.remove_cache(&entry.cache_key)?;
243 }
244
245 Ok(count)
246 }
247
248 pub fn total_size(&self) -> Result<u64> {
250 let entries = self.list_entries()?;
251 Ok(entries.iter().map(|e| e.cache_size).sum())
252 }
253
254 pub fn cache_dir(&self) -> &Path {
256 &self.cache_dir
257 }
258}
259
260impl Default for CacheManager {
261 fn default() -> Self {
262 Self::new().expect("Failed to create cache manager")
263 }
264}
265
266#[cfg(test)]
267mod tests {
268 use super::*;
269 use tempfile::TempDir;
270
271 fn setup_test_cache() -> (CacheManager, TempDir) {
272 let temp_dir = TempDir::new().unwrap();
273 let cache_manager = CacheManager::with_dir(temp_dir.path().to_path_buf()).unwrap();
274 (cache_manager, temp_dir)
275 }
276
277 #[test]
278 fn test_cache_key_computation() {
279 let temp_dir = TempDir::new().unwrap();
280 let test_file = temp_dir.path().join("test.sql");
281 fs::write(&test_file, "SELECT 1;").unwrap();
282
283 let key1 = CacheManager::compute_cache_key(&test_file).unwrap();
284 let key2 = CacheManager::compute_cache_key(&test_file).unwrap();
285
286 assert_eq!(key1, key2);
287 assert_eq!(key1.len(), 32); }
289
290 #[test]
291 fn test_cache_key_changes_with_content() {
292 let temp_dir = TempDir::new().unwrap();
293 let test_file = temp_dir.path().join("test.sql");
294
295 fs::write(&test_file, "SELECT 1;").unwrap();
296 let key1 = CacheManager::compute_cache_key(&test_file).unwrap();
297
298 fs::write(&test_file, "SELECT 2; -- with extra content to change size").unwrap();
300 let key2 = CacheManager::compute_cache_key(&test_file).unwrap();
301
302 assert_ne!(key1, key2);
304 }
305
306 #[test]
307 fn test_cache_path() {
308 let (cache_manager, _temp_dir) = setup_test_cache();
309 let cache_path = cache_manager.cache_path("abc123");
310 assert!(cache_path.to_string_lossy().ends_with("abc123.duckdb"));
311 }
312
313 #[test]
314 fn test_has_valid_cache_when_missing() {
315 let (cache_manager, temp_dir) = setup_test_cache();
316 let test_file = temp_dir.path().join("test.sql");
317 fs::write(&test_file, "SELECT 1;").unwrap();
318
319 assert!(!cache_manager.has_valid_cache(&test_file).unwrap());
320 }
321
322 #[test]
323 fn test_list_entries_empty() {
324 let (cache_manager, _temp_dir) = setup_test_cache();
325 let entries = cache_manager.list_entries().unwrap();
326 assert!(entries.is_empty());
327 }
328
329 #[test]
330 fn test_total_size_empty() {
331 let (cache_manager, _temp_dir) = setup_test_cache();
332 assert_eq!(cache_manager.total_size().unwrap(), 0);
333 }
334}