venus_core/salsa_db/
cache.rs

1//! Cache persistence for Salsa DB using rkyv.
2//!
3//! This module provides disk-based persistence of compilation state,
4//! enabling instant resume on startup by avoiding recompilation of
5//! unchanged cells.
6//!
7//! # Cache Structure
8//!
9//! The cache stores:
10//! - Toolchain version for validation
11//! - Dependency hash for universe invalidation
12//! - Compilation results per cell (dylib paths, source hashes)
13//!
14//! # Usage
15//!
16//! ```ignore
17//! use venus_core::salsa_db::{VenusDatabase, CachePersistence};
18//!
19//! let db = VenusDatabase::new();
20//! let cache_path = PathBuf::from(".venus/cache/salsa.bin");
21//!
22//! // Load existing cache if valid
23//! if let Some(snapshot) = CachePersistence::load(&cache_path, "nightly-2024-01-15")? {
24//!     db.restore_from_snapshot(&snapshot);
25//! }
26//!
27//! // ... work with db ...
28//!
29//! // Save cache on exit
30//! let snapshot = db.create_snapshot("nightly-2024-01-15", dep_hash);
31//! CachePersistence::save(&cache_path, &snapshot)?;
32//! ```
33
34use std::collections::HashMap;
35use std::fs;
36use std::io::{self, Read, Write};
37use std::path::Path;
38use std::time::{SystemTime, UNIX_EPOCH};
39
40use rkyv::{rancor, Archive, Deserialize, Serialize};
41
42/// Current cache format version.
43///
44/// Increment this when the cache format changes in an incompatible way.
45/// Old caches with different versions will be automatically invalidated.
46pub const CACHE_VERSION: u32 = 1;
47
48/// Snapshot of Salsa DB state that can be persisted to disk.
49///
50/// This captures the essential compilation state needed for instant resume:
51/// - Which cells have been compiled
52/// - Where their dylibs are located
53/// - What source code produced them (via hash)
54///
55/// Note: This does NOT store Salsa's internal memoization state.
56/// Salsa queries will be recomputed on load, but since compiled dylibs
57/// are preserved, compilation (the slow part) is skipped.
58#[derive(Archive, Serialize, Deserialize, Debug, Clone)]
59pub struct CacheSnapshot {
60    /// Cache format version for compatibility checking.
61    pub version: u32,
62
63    /// Rust toolchain version string (e.g., "rustc 1.76.0-nightly (abc123 2024-01-15)").
64    ///
65    /// Cache is invalidated if toolchain changes, since compiled dylibs
66    /// may have ABI incompatibilities.
67    pub toolchain_version: String,
68
69    /// Hash of external dependencies from `//! [dependencies]` block.
70    ///
71    /// If this changes, the universe dylib needs recompilation and
72    /// all cells must be recompiled against the new universe.
73    pub dependency_hash: u64,
74
75    /// Compilation results keyed by cell name.
76    pub cells: HashMap<String, CachedCell>,
77
78    /// Unix timestamp when cache was created.
79    pub created_at: u64,
80}
81
82/// Cached compilation result for a single cell.
83#[derive(Archive, Serialize, Deserialize, Debug, Clone)]
84pub struct CachedCell {
85    /// Name of the cell function.
86    pub name: String,
87
88    /// Hash of the cell's source code.
89    ///
90    /// Used to detect if the cell has changed since compilation.
91    pub source_hash: u64,
92
93    /// Path to compiled dylib (relative to cache directory).
94    ///
95    /// Empty string if compilation failed.
96    pub dylib_path: String,
97
98    /// Compilation status.
99    pub status: CachedCompilationStatus,
100}
101
102/// Cached compilation status.
103#[derive(Archive, Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
104pub enum CachedCompilationStatus {
105    /// Cell compiled successfully.
106    Success,
107
108    /// Compilation was skipped (used cached dylib).
109    Cached,
110
111    /// Compilation failed with error message.
112    Failed { error: String },
113}
114
115/// Error type for cache operations.
116#[derive(Debug)]
117pub enum CacheError {
118    /// IO error reading/writing cache file.
119    Io(io::Error),
120
121    /// Cache format version mismatch.
122    VersionMismatch { expected: u32, found: u32 },
123
124    /// Toolchain version mismatch.
125    ToolchainMismatch { expected: String, found: String },
126
127    /// Dependency hash mismatch (universe needs recompilation).
128    DependencyMismatch { expected: u64, found: u64 },
129
130    /// Failed to deserialize cache data.
131    Deserialize(String),
132
133    /// Failed to serialize cache data.
134    Serialize(String),
135}
136
137impl std::fmt::Display for CacheError {
138    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139        match self {
140            CacheError::Io(e) => write!(f, "cache IO error: {}", e),
141            CacheError::VersionMismatch { expected, found } => {
142                write!(
143                    f,
144                    "cache version mismatch: expected {}, found {}",
145                    expected, found
146                )
147            }
148            CacheError::ToolchainMismatch { expected, found } => {
149                write!(
150                    f,
151                    "toolchain mismatch: expected '{}', found '{}'",
152                    expected, found
153                )
154            }
155            CacheError::DependencyMismatch { expected, found } => {
156                write!(
157                    f,
158                    "dependency hash mismatch: expected {:#x}, found {:#x}",
159                    expected, found
160                )
161            }
162            CacheError::Deserialize(e) => write!(f, "cache deserialize error: {}", e),
163            CacheError::Serialize(e) => write!(f, "cache serialize error: {}", e),
164        }
165    }
166}
167
168impl std::error::Error for CacheError {
169    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
170        match self {
171            CacheError::Io(e) => Some(e),
172            _ => None,
173        }
174    }
175}
176
177impl From<io::Error> for CacheError {
178    fn from(e: io::Error) -> Self {
179        CacheError::Io(e)
180    }
181}
182
183/// Cache persistence operations.
184pub struct CachePersistence;
185
186impl CachePersistence {
187    /// Save a cache snapshot to disk.
188    ///
189    /// Creates parent directories if they don't exist.
190    /// Uses atomic write (write to temp file, then rename) to prevent corruption.
191    pub fn save(path: &Path, snapshot: &CacheSnapshot) -> Result<(), CacheError> {
192        // Ensure parent directory exists
193        if let Some(parent) = path.parent() {
194            fs::create_dir_all(parent)?;
195        }
196
197        // Serialize with rkyv
198        let bytes = rkyv::to_bytes::<rancor::Error>(snapshot)
199            .map_err(|e| CacheError::Serialize(e.to_string()))?;
200
201        // Write to temp file first for atomic operation
202        // Use PID to make temp file unique and avoid race conditions
203        let pid = std::process::id();
204        let temp_path = path.with_extension(format!("tmp.{}", pid));
205
206        let mut file = fs::File::create(&temp_path)?;
207        file.write_all(&bytes)?;
208        file.sync_all()?;
209
210        // Atomic rename
211        let rename_result = fs::rename(&temp_path, path);
212
213        // Clean up temp file if rename failed
214        if rename_result.is_err() {
215            let _ = fs::remove_file(&temp_path);
216        }
217
218        rename_result?;
219
220        tracing::debug!(
221            "Saved cache snapshot: {} cells, {} bytes",
222            snapshot.cells.len(),
223            bytes.len()
224        );
225
226        Ok(())
227    }
228
229    /// Load a cache snapshot from disk.
230    ///
231    /// Returns `Ok(None)` if the cache file doesn't exist.
232    /// Returns `Err` if the cache exists but is invalid or incompatible.
233    ///
234    /// # Arguments
235    ///
236    /// * `path` - Path to the cache file
237    /// * `expected_toolchain` - Current toolchain version; cache is invalidated if different
238    pub fn load(path: &Path, expected_toolchain: &str) -> Result<Option<CacheSnapshot>, CacheError> {
239        // Check if cache exists
240        if !path.exists() {
241            tracing::debug!("No cache file at {:?}", path);
242            return Ok(None);
243        }
244
245        // Read cache file
246        let mut file = fs::File::open(path)?;
247        let mut bytes = Vec::new();
248        file.read_to_end(&mut bytes)?;
249
250        // Deserialize with validation
251        let archived = rkyv::access::<ArchivedCacheSnapshot, rancor::Error>(&bytes)
252            .map_err(|e| CacheError::Deserialize(e.to_string()))?;
253
254        // Check cache version
255        let found_version: u32 = archived.version.into();
256        if found_version != CACHE_VERSION {
257            return Err(CacheError::VersionMismatch {
258                expected: CACHE_VERSION,
259                found: found_version,
260            });
261        }
262
263        // Deserialize fully
264        let snapshot: CacheSnapshot =
265            rkyv::deserialize::<CacheSnapshot, rancor::Error>(archived)
266                .map_err(|e| CacheError::Deserialize(e.to_string()))?;
267
268        // Check toolchain version
269        if snapshot.toolchain_version != expected_toolchain {
270            return Err(CacheError::ToolchainMismatch {
271                expected: expected_toolchain.to_string(),
272                found: snapshot.toolchain_version.clone(),
273            });
274        }
275
276        tracing::debug!(
277            "Loaded cache snapshot: {} cells, created at {}",
278            snapshot.cells.len(),
279            snapshot.created_at
280        );
281
282        Ok(Some(snapshot))
283    }
284
285    /// Load a cache snapshot without toolchain validation.
286    ///
287    /// Use this when you want to inspect the cache or handle
288    /// validation separately.
289    pub fn load_unchecked(path: &Path) -> Result<Option<CacheSnapshot>, CacheError> {
290        if !path.exists() {
291            return Ok(None);
292        }
293
294        let mut file = fs::File::open(path)?;
295        let mut bytes = Vec::new();
296        file.read_to_end(&mut bytes)?;
297
298        let archived = rkyv::access::<ArchivedCacheSnapshot, rancor::Error>(&bytes)
299            .map_err(|e| CacheError::Deserialize(e.to_string()))?;
300
301        let found_version: u32 = archived.version.into();
302        if found_version != CACHE_VERSION {
303            return Err(CacheError::VersionMismatch {
304                expected: CACHE_VERSION,
305                found: found_version,
306            });
307        }
308
309        let snapshot: CacheSnapshot =
310            rkyv::deserialize::<CacheSnapshot, rancor::Error>(archived)
311                .map_err(|e| CacheError::Deserialize(e.to_string()))?;
312
313        Ok(Some(snapshot))
314    }
315
316    /// Delete the cache file if it exists.
317    pub fn invalidate(path: &Path) -> Result<(), CacheError> {
318        if path.exists() {
319            fs::remove_file(path)?;
320            tracing::debug!("Invalidated cache at {:?}", path);
321        }
322        Ok(())
323    }
324}
325
326impl CacheSnapshot {
327    /// Create a new cache snapshot.
328    pub fn new(toolchain_version: String, dependency_hash: u64) -> Self {
329        let created_at = SystemTime::now()
330            .duration_since(UNIX_EPOCH)
331            .map(|d| d.as_secs())
332            .unwrap_or(0);
333
334        Self {
335            version: CACHE_VERSION,
336            toolchain_version,
337            dependency_hash,
338            cells: HashMap::new(),
339            created_at,
340        }
341    }
342
343    /// Add a compiled cell to the snapshot.
344    pub fn add_cell(&mut self, cell: CachedCell) {
345        self.cells.insert(cell.name.clone(), cell);
346    }
347
348    /// Get a cached cell by name.
349    pub fn get_cell(&self, name: &str) -> Option<&CachedCell> {
350        self.cells.get(name)
351    }
352
353    /// Check if a cell's source has changed.
354    ///
355    /// Returns `true` if the cell exists in cache with the same source hash.
356    pub fn is_cell_valid(&self, name: &str, current_source_hash: u64) -> bool {
357        self.cells
358            .get(name)
359            .map(|c| c.source_hash == current_source_hash)
360            .unwrap_or(false)
361    }
362
363    /// Check if dependency hash matches.
364    pub fn is_dependency_valid(&self, current_hash: u64) -> bool {
365        self.dependency_hash == current_hash
366    }
367}
368
369impl CachedCell {
370    /// Create a new cached cell with successful compilation.
371    pub fn success(name: String, source_hash: u64, dylib_path: String) -> Self {
372        Self {
373            name,
374            source_hash,
375            dylib_path,
376            status: CachedCompilationStatus::Success,
377        }
378    }
379
380    /// Create a new cached cell using existing cache.
381    pub fn cached(name: String, source_hash: u64, dylib_path: String) -> Self {
382        Self {
383            name,
384            source_hash,
385            dylib_path,
386            status: CachedCompilationStatus::Cached,
387        }
388    }
389
390    /// Create a new cached cell with failed compilation.
391    pub fn failed(name: String, source_hash: u64, error: String) -> Self {
392        Self {
393            name,
394            source_hash,
395            dylib_path: String::new(),
396            status: CachedCompilationStatus::Failed { error },
397        }
398    }
399
400    /// Check if the cell compiled successfully (or used cache).
401    pub fn is_success(&self) -> bool {
402        matches!(
403            self.status,
404            CachedCompilationStatus::Success | CachedCompilationStatus::Cached
405        )
406    }
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412    use tempfile::tempdir;
413
414    #[test]
415    fn test_cache_round_trip() {
416        let dir = tempdir().unwrap();
417        let cache_path = dir.path().join("test_cache.bin");
418
419        // Create snapshot
420        let mut snapshot = CacheSnapshot::new("rustc 1.76.0-nightly".to_string(), 0x12345678);
421
422        snapshot.add_cell(CachedCell::success(
423            "cell_a".to_string(),
424            0xAABBCCDD,
425            "cell_a.so".to_string(),
426        ));
427
428        snapshot.add_cell(CachedCell::failed(
429            "cell_b".to_string(),
430            0x11223344,
431            "type mismatch".to_string(),
432        ));
433
434        // Save
435        CachePersistence::save(&cache_path, &snapshot).unwrap();
436
437        // Load
438        let loaded = CachePersistence::load(&cache_path, "rustc 1.76.0-nightly")
439            .unwrap()
440            .unwrap();
441
442        assert_eq!(loaded.version, CACHE_VERSION);
443        assert_eq!(loaded.toolchain_version, "rustc 1.76.0-nightly");
444        assert_eq!(loaded.dependency_hash, 0x12345678);
445        assert_eq!(loaded.cells.len(), 2);
446
447        let cell_a = loaded.get_cell("cell_a").unwrap();
448        assert_eq!(cell_a.source_hash, 0xAABBCCDD);
449        assert!(cell_a.is_success());
450
451        let cell_b = loaded.get_cell("cell_b").unwrap();
452        assert!(!cell_b.is_success());
453        assert!(matches!(
454            &cell_b.status,
455            CachedCompilationStatus::Failed { error } if error == "type mismatch"
456        ));
457    }
458
459    #[test]
460    fn test_cache_missing_file() {
461        let dir = tempdir().unwrap();
462        let cache_path = dir.path().join("nonexistent.bin");
463
464        let result = CachePersistence::load(&cache_path, "rustc 1.76.0-nightly").unwrap();
465        assert!(result.is_none());
466    }
467
468    #[test]
469    fn test_cache_toolchain_mismatch() {
470        let dir = tempdir().unwrap();
471        let cache_path = dir.path().join("test_cache.bin");
472
473        // Save with one toolchain
474        let snapshot = CacheSnapshot::new("rustc 1.76.0-nightly".to_string(), 0);
475        CachePersistence::save(&cache_path, &snapshot).unwrap();
476
477        // Load with different toolchain
478        let result = CachePersistence::load(&cache_path, "rustc 1.77.0-nightly");
479
480        assert!(matches!(
481            result,
482            Err(CacheError::ToolchainMismatch { .. })
483        ));
484    }
485
486    #[test]
487    fn test_cache_invalidation() {
488        let dir = tempdir().unwrap();
489        let cache_path = dir.path().join("test_cache.bin");
490
491        // Create cache
492        let snapshot = CacheSnapshot::new("test".to_string(), 0);
493        CachePersistence::save(&cache_path, &snapshot).unwrap();
494        assert!(cache_path.exists());
495
496        // Invalidate
497        CachePersistence::invalidate(&cache_path).unwrap();
498        assert!(!cache_path.exists());
499    }
500
501    #[test]
502    fn test_cell_validity() {
503        let mut snapshot = CacheSnapshot::new("test".to_string(), 0);
504
505        snapshot.add_cell(CachedCell::success("test".to_string(), 0x1234, "".to_string()));
506
507        // Same hash - valid
508        assert!(snapshot.is_cell_valid("test", 0x1234));
509
510        // Different hash - invalid
511        assert!(!snapshot.is_cell_valid("test", 0x5678));
512
513        // Unknown cell - invalid
514        assert!(!snapshot.is_cell_valid("unknown", 0x1234));
515    }
516
517    #[test]
518    fn test_dependency_validity() {
519        let snapshot = CacheSnapshot::new("test".to_string(), 0xABCD);
520
521        assert!(snapshot.is_dependency_valid(0xABCD));
522        assert!(!snapshot.is_dependency_valid(0x1234));
523    }
524}