Skip to main content

sqry_core/cache/
persist.rs

1//! Disk persistence for cache entries.
2//!
3//! This module provides atomic disk persistence with crash safety and multi-process
4//! coordination. Cache entries are stored in `.sqry-cache/` with the following layout:
5//!
6//! ```text
7//! .sqry-cache/
8//! ├── <user_namespace_id>/               # Per-user namespace (hash of $USER)
9//! │   ├── rust/                          # Language-specific directory
10//! │   │   ├── <content_hash>/            # BLAKE3 of file content (64 hex chars)
11//! │   │   │   ├── <path_hash>/           # BLAKE3 of canonical path (16 hex chars)
12//! │   │   │   │   ├── filename.rs.bin       # Cached symbols
13//! │   │   │   │   └── filename.rs.bin.lock  # Write lock
14//! │   ├── python/
15//! │   ├── typescript/
16//! │   └── manifest.json                   # Size tracking (Phase 3)
17//! ```
18//!
19//! # Features
20//!
21//! - **Atomic writes**: Temp file + `fs::rename` for crash safety
22//! - **Multi-process safe**: Per-entry lock files prevent corruption
23//! - **Stale lock cleanup**: Removes locks from dead processes
24//! - **Size tracking**: Manifest tracks total bytes per language
25//!
26//! # Examples
27//!
28//! ```rust,ignore
29//! use sqry_core::cache::persist::PersistManager;
30//!
31//! let manager = PersistManager::new("/path/to/.sqry-cache")?;
32//!
33//! // Write entry
34//! manager.write_entry(&key, &summaries)?;
35//!
36//! // Read entry
37//! if let Some(summaries) = manager.read_entry(&key)? {
38//!     // Use cached data
39//! }
40//! ```
41
42use crate::cache::{CacheKey, GraphNodeSummary};
43use anyhow::{Context, Result};
44use serde::{Deserialize, Serialize};
45use std::collections::HashMap;
46use std::fs;
47use std::io::Write;
48use std::path::{Path, PathBuf};
49use std::time::{Duration, SystemTime};
50
51/// Cache manifest for tracking size and metadata.
52///
53/// **Status**: Defined but not yet used. Full implementation planned for Phase 3
54/// (manifest management, disk size tracking, and enforcement).
55///
56/// When implemented, the manifest will be stored as `manifest.json` in the cache
57/// root directory and used to:
58/// - Track total disk usage per language
59/// - Enforce disk size limits with LRU eviction
60/// - Provide cache statistics for CLI tooling
61///
62/// See [`docs/development/cache/PHASE_2_COMPLETE.md`](../../../docs/development/cache/PHASE_2_COMPLETE.md)
63/// for Phase 3 roadmap.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct CacheManifest {
66    /// Total bytes used per language
67    pub bytes_by_language: HashMap<String, u64>,
68
69    /// sqry version that wrote this manifest
70    pub sqry_version: String,
71
72    /// Last updated timestamp
73    pub updated_at: SystemTime,
74}
75
76impl Default for CacheManifest {
77    fn default() -> Self {
78        Self {
79            bytes_by_language: HashMap::new(),
80            sqry_version: env!("CARGO_PKG_VERSION").to_string(),
81            updated_at: SystemTime::now(),
82        }
83    }
84}
85
86/// Persistence manager for cache entries.
87///
88/// Handles atomic writes, lock management, and manifest tracking.
89pub struct PersistManager {
90    /// Root cache directory (e.g., `.sqry-cache/`)
91    cache_root: PathBuf,
92
93    /// User-specific namespace hash (subdirectory under `cache_root`)
94    user_namespace_id: String,
95}
96
97impl PersistManager {
98    /// Create a new persistence manager.
99    ///
100    /// # Arguments
101    ///
102    /// - `cache_root`: Root directory for cache storage
103    ///
104    /// # Examples
105    ///
106    /// ```rust,ignore
107    /// let manager = PersistManager::new(".sqry-cache")?;
108    /// ```
109    ///
110    /// # Errors
111    ///
112    /// Returns [`anyhow::Error`] when the cache directories cannot be created or when stale
113    /// lock cleanup fails.
114    pub fn new<P: AsRef<Path>>(cache_root: P) -> Result<Self> {
115        let cache_root = cache_root.as_ref().to_path_buf();
116
117        // Create cache directory if it doesn't exist
118        fs::create_dir_all(&cache_root).with_context(|| {
119            format!("Failed to create cache directory: {}", cache_root.display())
120        })?;
121
122        // Generate user-specific namespace
123        let user_namespace_id = Self::compute_user_hash();
124
125        let manager = Self {
126            cache_root,
127            user_namespace_id,
128        };
129
130        // Clean up stale locks on initialization
131        manager.cleanup_stale_locks()?;
132
133        Ok(manager)
134    }
135
136    /// Compute a hash of the current user for namespacing.
137    ///
138    /// Uses the current username to create a subdirectory under cache root.
139    /// This prevents collisions when multiple users share the same filesystem.
140    fn compute_user_hash() -> String {
141        use std::collections::hash_map::DefaultHasher;
142        use std::hash::{Hash, Hasher};
143
144        let username = std::env::var("USER")
145            .or_else(|_| std::env::var("USERNAME"))
146            .unwrap_or_else(|_| "default".to_string());
147
148        let mut hasher = DefaultHasher::new();
149        username.hash(&mut hasher);
150        format!("{:x}", hasher.finish())
151    }
152
153    /// Get the user-specific cache directory.
154    ///
155    /// Returns the path where this user's cache entries are stored.
156    #[must_use]
157    pub fn user_cache_dir(&self) -> PathBuf {
158        self.cache_root.join(&self.user_namespace_id)
159    }
160
161    /// Get the file path for a cache entry.
162    ///
163    /// Format: `<user_namespace_id>/<language>/<hash>/<filename>.bin`
164    fn entry_path(&self, key: &CacheKey) -> PathBuf {
165        let storage_key = key.storage_key();
166        self.user_cache_dir().join(format!("{storage_key}.bin"))
167    }
168
169    /// Get the lock file path for a cache entry.
170    fn lock_path(&self, key: &CacheKey) -> PathBuf {
171        let mut path = self.entry_path(key);
172        path.set_extension("bin.lock");
173        path
174    }
175
176    /// Write a cache entry to disk atomically.
177    ///
178    /// Uses temp file + rename pattern for crash safety.
179    ///
180    /// # Arguments
181    ///
182    /// - `key`: Cache key identifying the entry
183    /// - `summaries`: Node summaries to persist
184    ///
185    /// # Returns
186    ///
187    /// `Ok(bytes_written)` on success
188    ///
189    /// # Errors
190    ///
191    /// Returns [`anyhow::Error`] when locking, serialization, writing, or atomic rename steps fail.
192    pub fn write_entry(&self, key: &CacheKey, summaries: &[GraphNodeSummary]) -> Result<usize> {
193        let entry_path = self.entry_path(key);
194        let lock_path = self.lock_path(key);
195
196        // Create parent directories
197        if let Some(parent) = entry_path.parent() {
198            fs::create_dir_all(parent)?;
199        }
200
201        // Acquire lock
202        let _lock_guard = Self::acquire_lock(&lock_path)?;
203
204        // Serialize to postcard
205        let data = postcard::to_allocvec(summaries).context("Failed to serialize cache entry")?;
206
207        // Write to temporary file
208        let tmp_cache_file_path = entry_path.with_extension("tmp");
209        {
210            let mut temp_file = fs::File::create(&tmp_cache_file_path).with_context(|| {
211                format!(
212                    "Failed to create temp file: {}",
213                    tmp_cache_file_path.display()
214                )
215            })?;
216
217            temp_file.write_all(&data)?;
218            temp_file.sync_all()?; // Ensure data is flushed
219        } // Close file handle before rename (required on Windows)
220
221        // On Windows, remove destination file first if it exists
222        // (Windows doesn't allow renaming over existing files with open handles)
223        #[cfg(windows)]
224        if entry_path.exists() {
225            fs::remove_file(&entry_path).with_context(|| {
226                format!("Failed to remove existing file: {}", entry_path.display())
227            })?;
228        }
229
230        // Atomic rename
231        match fs::rename(&tmp_cache_file_path, &entry_path) {
232            Ok(()) => {
233                log::debug!(
234                    "Wrote cache entry: {} ({} bytes)",
235                    entry_path.display(),
236                    data.len()
237                );
238                Ok(data.len())
239            }
240            Err(e) => {
241                // Clean up temp file on failure
242                let _ = fs::remove_file(&tmp_cache_file_path);
243                Err(e).with_context(|| {
244                    format!(
245                        "Failed to rename {} to {}",
246                        tmp_cache_file_path.display(),
247                        entry_path.display()
248                    )
249                })
250            }
251        }
252    }
253
254    /// Read a cache entry from disk.
255    ///
256    /// # Returns
257    ///
258    /// - `Ok(Some(summaries))` if entry exists and is valid
259    /// - `Ok(None)` if entry doesn't exist
260    /// - `Err(_)` on I/O or deserialization errors
261    /// # Errors
262    ///
263    /// Returns [`anyhow::Error`] when the entry cannot be read or deserialized.
264    pub fn read_entry(&self, key: &CacheKey) -> Result<Option<Vec<GraphNodeSummary>>> {
265        let entry_path = self.entry_path(key);
266
267        if !entry_path.exists() {
268            return Ok(None);
269        }
270
271        // Size cap to prevent memory exhaustion from crafted cache entries
272        const MAX_CACHE_ENTRY_BYTES: u64 = 64 * 1024 * 1024; // 64 MiB
273        let metadata = fs::metadata(&entry_path)
274            .with_context(|| format!("Failed to stat cache entry: {}", entry_path.display()))?;
275        if metadata.len() > MAX_CACHE_ENTRY_BYTES {
276            anyhow::bail!(
277                "Cache entry is too large ({} bytes, max {}): {}",
278                metadata.len(),
279                MAX_CACHE_ENTRY_BYTES,
280                entry_path.display()
281            );
282        }
283        let data = fs::read(&entry_path)
284            .with_context(|| format!("Failed to read cache entry: {}", entry_path.display()))?;
285
286        let summaries: Vec<GraphNodeSummary> = postcard::from_bytes(&data).with_context(|| {
287            format!(
288                "Failed to deserialize cache entry: {}",
289                entry_path.display()
290            )
291        })?;
292
293        log::debug!(
294            "Read cache entry: {} ({} symbols)",
295            entry_path.display(),
296            summaries.len()
297        );
298
299        Ok(Some(summaries))
300    }
301
302    /// Delete a cache entry from disk.
303    /// # Errors
304    ///
305    /// Returns [`anyhow::Error`] when either the entry or its lock file cannot be removed.
306    pub fn delete_entry(&self, key: &CacheKey) -> Result<()> {
307        let entry_path = self.entry_path(key);
308        let lock_path = self.lock_path(key);
309
310        // Remove entry file
311        if entry_path.exists() {
312            fs::remove_file(&entry_path).with_context(|| {
313                format!("Failed to delete cache entry: {}", entry_path.display())
314            })?;
315        }
316
317        // Remove lock file if it exists
318        if lock_path.exists() {
319            let _ = fs::remove_file(&lock_path); // Best effort
320        }
321
322        Ok(())
323    }
324
325    /// Acquire a write lock for an entry.
326    ///
327    /// Creates a lock file to prevent concurrent writes from other processes.
328    /// Returns a guard that automatically removes the lock on drop.
329    fn acquire_lock(lock_path: &Path) -> Result<LockGuard> {
330        // Try to create lock file (exclusive creation)
331        // If it already exists, wait and retry
332        let max_retries = 50;
333        let retry_delay = Duration::from_millis(100);
334
335        for attempt in 0..max_retries {
336            match fs::OpenOptions::new()
337                .write(true)
338                .create_new(true)
339                .open(lock_path)
340            {
341                Ok(mut file) => {
342                    // Write our PID to the lock file
343                    let pid = std::process::id();
344                    writeln!(file, "{pid}")?;
345                    file.sync_all()?;
346
347                    return Ok(LockGuard {
348                        path: lock_path.to_path_buf(),
349                    });
350                }
351                Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
352                    // Lock exists, check if it's stale
353                    if Self::is_lock_stale(lock_path)? {
354                        // Remove stale lock and retry
355                        let _ = fs::remove_file(lock_path);
356                        continue;
357                    }
358
359                    // Lock is active, wait and retry
360                    if attempt < max_retries - 1 {
361                        std::thread::sleep(retry_delay);
362                    } else {
363                        anyhow::bail!(
364                            "Failed to acquire lock after {} attempts: {}",
365                            max_retries,
366                            lock_path.display()
367                        );
368                    }
369                }
370                Err(e) => {
371                    return Err(e).context("Failed to create lock file");
372                }
373            }
374        }
375
376        anyhow::bail!("Failed to acquire lock: {}", lock_path.display())
377    }
378
379    /// Check if a lock file is stale (process no longer exists).
380    ///
381    /// First checks if the process exists (immediate recovery from crashes).
382    /// If process exists, verifies lock age as a safety check against hung processes.
383    fn is_lock_stale(lock_path: &Path) -> Result<bool> {
384        // Try to read PID from lock file
385        let content = fs::read_to_string(lock_path)?;
386        let pid = content
387            .trim()
388            .parse::<u32>()
389            .context("Failed to parse PID from lock file")?;
390
391        // Check if process exists (immediate crash recovery)
392        if !Self::process_exists(pid) {
393            log::debug!("Process {pid} no longer exists, lock is stale");
394            return Ok(true);
395        }
396
397        // Process exists - check age as secondary safety check
398        let metadata = fs::metadata(lock_path)?;
399        let modified = metadata.modified()?;
400        let age = SystemTime::now()
401            .duration_since(modified)
402            .unwrap_or(Duration::ZERO);
403
404        // If lock is older than 5 minutes, force cleanup (hung process)
405        if age > Duration::from_secs(300) {
406            log::warn!(
407                "Lock held by PID {} for {:?} - forcing cleanup: {}",
408                pid,
409                age,
410                lock_path.display()
411            );
412            return Ok(true);
413        }
414
415        Ok(false)
416    }
417
418    /// Check if a process with the given PID exists.
419    ///
420    /// Uses platform-specific methods:
421    /// - Linux: Check /proc/<pid> directory
422    /// - macOS/BSD: Use kill(pid, 0) signal probe via nix crate
423    /// - Windows: Always returns true (no portable check available)
424    #[cfg(unix)]
425    fn process_exists(pid: u32) -> bool {
426        #[cfg(target_os = "linux")]
427        {
428            // Linux: Check /proc/<pid> directory
429            let proc_path = format!("/proc/{pid}");
430            std::path::Path::new(&proc_path).exists()
431        }
432
433        #[cfg(not(target_os = "linux"))]
434        {
435            // macOS/BSD: Use kill(pid, 0) signal probe
436            // Sending signal 0 checks if the process exists without actually sending a signal
437            use nix::sys::signal::kill;
438            use nix::unistd::Pid;
439
440            match kill(Pid::from_raw(pid as i32), None) {
441                Ok(_) => true,   // Process exists
442                Err(_) => false, // Process doesn't exist or no permission
443            }
444        }
445    }
446
447    #[cfg(not(unix))]
448    fn process_exists(_pid: u32) -> bool {
449        // On non-Unix, we can't reliably check process existence
450        // Conservative approach: assume it exists
451        true
452    }
453
454    /// Clean up stale lock files on initialization.
455    fn cleanup_stale_locks(&self) -> Result<()> {
456        let user_dir = self.user_cache_dir();
457
458        if !user_dir.exists() {
459            return Ok(()); // No cache directory yet
460        }
461
462        // Find all .lock files
463        let walker = walkdir::WalkDir::new(&user_dir)
464            .max_depth(10)
465            .into_iter()
466            .filter_map(std::result::Result::ok)
467            .filter(|e| {
468                e.path()
469                    .extension()
470                    .and_then(|ext| ext.to_str())
471                    .is_some_and(|ext| ext == "lock")
472            });
473
474        let mut cleaned = 0;
475        for entry in walker {
476            let path = entry.path();
477
478            if Self::is_lock_stale(path)? {
479                if let Err(e) = fs::remove_file(path) {
480                    log::warn!("Failed to remove stale lock {}: {}", path.display(), e);
481                } else {
482                    log::debug!("Removed stale lock: {}", path.display());
483                    cleaned += 1;
484                }
485            }
486        }
487
488        if cleaned > 0 {
489            log::info!("Cleaned up {cleaned} stale lock files");
490        }
491
492        Ok(())
493    }
494
495    /// Clear all cache entries for this user.
496    /// # Errors
497    ///
498    /// Returns [`anyhow::Error`] when directory traversal or entry deletion fails.
499    pub fn clear_all(&self) -> Result<()> {
500        let user_dir = self.user_cache_dir();
501
502        if user_dir.exists() {
503            fs::remove_dir_all(&user_dir).with_context(|| {
504                format!("Failed to remove cache directory: {}", user_dir.display())
505            })?;
506
507            log::info!("Cleared all cache entries in {}", user_dir.display());
508        }
509
510        Ok(())
511    }
512}
513
514/// RAII guard for lock files.
515///
516/// Automatically removes the lock file when dropped.
517struct LockGuard {
518    path: PathBuf,
519}
520
521impl Drop for LockGuard {
522    fn drop(&mut self) {
523        // Best effort removal - log error but don't panic
524        if let Err(e) = fs::remove_file(&self.path) {
525            log::warn!("Failed to remove lock file {}: {}", self.path.display(), e);
526        }
527    }
528}
529
530#[cfg(test)]
531mod tests {
532    use super::*;
533    use crate::cache::CacheKey;
534    use crate::graph::unified::node::NodeKind;
535    use crate::hash::Blake3Hash;
536    use std::path::{Path, PathBuf};
537    use std::sync::Arc;
538    use tempfile::TempDir;
539
540    fn make_test_key() -> CacheKey {
541        let hash = Blake3Hash::from_bytes([42; 32]);
542        CacheKey::from_raw_path(PathBuf::from("/test/file.rs"), "rust", hash)
543    }
544
545    fn make_test_summary() -> GraphNodeSummary {
546        GraphNodeSummary::new(
547            Arc::from("test_fn"),
548            NodeKind::Function,
549            Arc::from(Path::new("test.rs")),
550            1,
551            0,
552            1,
553            10,
554        )
555    }
556
557    #[test]
558    fn test_persist_manager_new() {
559        let tmp_cache_dir = TempDir::new().unwrap();
560        let manager = PersistManager::new(tmp_cache_dir.path()).unwrap();
561
562        // Cache root should exist
563        assert!(tmp_cache_dir.path().exists());
564        // User directory path should be valid (may not exist until first write)
565        assert!(!manager.user_namespace_id.is_empty());
566    }
567
568    #[test]
569    fn test_write_and_read_entry() {
570        let tmp_cache_dir = TempDir::new().unwrap();
571        let manager = PersistManager::new(tmp_cache_dir.path()).unwrap();
572
573        let key = make_test_key();
574        let summaries = vec![make_test_summary()];
575
576        // Write entry
577        let bytes_written = manager.write_entry(&key, &summaries).unwrap();
578        assert!(bytes_written > 0);
579
580        // Read entry back
581        let read_summaries = manager.read_entry(&key).unwrap().unwrap();
582        assert_eq!(read_summaries.len(), 1);
583        assert_eq!(read_summaries[0].name, summaries[0].name);
584    }
585
586    #[test]
587    fn test_read_nonexistent_entry() {
588        let tmp_cache_dir = TempDir::new().unwrap();
589        let manager = PersistManager::new(tmp_cache_dir.path()).unwrap();
590
591        let key = make_test_key();
592        let result = manager.read_entry(&key).unwrap();
593
594        assert!(result.is_none());
595    }
596
597    #[test]
598    fn test_delete_entry() {
599        let tmp_cache_dir = TempDir::new().unwrap();
600        let manager = PersistManager::new(tmp_cache_dir.path()).unwrap();
601
602        let key = make_test_key();
603        let summaries = vec![make_test_summary()];
604
605        // Write entry
606        manager.write_entry(&key, &summaries).unwrap();
607        assert!(manager.read_entry(&key).unwrap().is_some());
608
609        // Delete entry
610        manager.delete_entry(&key).unwrap();
611        assert!(manager.read_entry(&key).unwrap().is_none());
612    }
613
614    #[test]
615    fn test_clear_all() {
616        let tmp_cache_dir = TempDir::new().unwrap();
617        let manager = PersistManager::new(tmp_cache_dir.path()).unwrap();
618
619        let key = make_test_key();
620        let summaries = vec![make_test_summary()];
621
622        // Write entry
623        manager.write_entry(&key, &summaries).unwrap();
624        assert!(manager.read_entry(&key).unwrap().is_some());
625
626        // Clear all
627        manager.clear_all().unwrap();
628        assert!(manager.read_entry(&key).unwrap().is_none());
629    }
630}