venus_core/salsa_db/
mod.rs

1//! Salsa-based incremental computation database for Venus.
2//!
3//! This module provides memoized queries for:
4//! - Source file parsing
5//! - Cell extraction
6//! - Dependency graph construction
7//! - Cell compilation (with caching)
8//!
9//! Changes to inputs automatically invalidate dependent queries,
10//! enabling efficient incremental recomputation.
11//!
12//! # Module Organization
13//!
14//! - [`inputs`] - Input types (SourceFile, CompilerSettings)
15//! - [`queries`] - Tracked query functions
16//! - [`conversions`] - Type conversions for Salsa compatibility
17//! - [`cache`] - Disk persistence for compilation cache
18
19pub mod cache;
20mod conversions;
21mod inputs;
22mod queries;
23
24use std::path::PathBuf;
25use std::sync::Arc;
26
27use salsa::Setter;
28
29// Re-export public types
30pub use conversions::{
31    CellData, CellOutputData, CompilationStatus, CompiledCellData, ExecutionStatus, GraphAnalysis,
32};
33pub use inputs::{CellOutputs, CompilerSettings, SourceFile};
34pub use queries::{
35    all_cells_executed, cell_names, cell_output, cell_output_data, compile_all_cells, compiled_cell,
36    dependency_hash, execution_order, execution_order_result, graph_analysis, graph_analysis_result,
37    invalidated_by, parallel_levels, parse_cells, parse_cells_result, QueryResult,
38};
39
40/// The concrete database implementation.
41///
42/// This is the main entry point for incremental computation in Venus.
43/// Create an instance with [`VenusDatabase::new()`] and use the helper
44/// methods to interact with the incremental system.
45#[salsa::db]
46#[derive(Default, Clone)]
47pub struct VenusDatabase {
48    storage: salsa::Storage<Self>,
49}
50
51#[salsa::db]
52impl salsa::Database for VenusDatabase {}
53
54impl VenusDatabase {
55    /// Create a new Venus database.
56    pub fn new() -> Self {
57        Self::default()
58    }
59
60    // =========================================================================
61    // Source File Management
62    // =========================================================================
63
64    /// Create a new source file input.
65    ///
66    /// Returns a handle that can be used with query functions.
67    pub fn set_source(&self, path: PathBuf, text: String) -> SourceFile {
68        SourceFile::new(self, path, text)
69    }
70
71    /// Update an existing source file's content.
72    ///
73    /// This will invalidate all queries that depend on this source.
74    pub fn update_source(&mut self, source: SourceFile, text: String) {
75        source.set_text(self).to(text);
76    }
77
78    // =========================================================================
79    // Cell Queries
80    // =========================================================================
81
82    /// Parse cells from a source file.
83    ///
84    /// Returns the list of cells extracted from the source.
85    /// Returns an empty vector on parse errors.
86    pub fn get_cells(&self, source: SourceFile) -> Vec<CellData> {
87        parse_cells(self, source)
88    }
89
90    /// Parse cells from a source file with error reporting.
91    ///
92    /// Returns `QueryResult::Ok` with cells on success, or `QueryResult::Err`
93    /// with an error message on parse failure.
94    pub fn get_cells_result(&self, source: SourceFile) -> QueryResult<Vec<CellData>> {
95        parse_cells_result(self, source)
96    }
97
98    /// Get cell names from a source file.
99    pub fn get_cell_names(&self, source: SourceFile) -> Vec<String> {
100        cell_names(self, source)
101    }
102
103    /// Get the execution order for a notebook.
104    ///
105    /// Returns cell indices in topological order.
106    /// Returns an empty vector on graph errors.
107    pub fn get_execution_order(&self, source: SourceFile) -> Vec<usize> {
108        execution_order(self, source)
109    }
110
111    /// Get the execution order for a notebook with error reporting.
112    ///
113    /// Returns `QueryResult::Ok` with ordered indices on success, or
114    /// `QueryResult::Err` with an error message on graph errors (cycles,
115    /// missing dependencies, etc.).
116    pub fn get_execution_order_result(&self, source: SourceFile) -> QueryResult<Vec<usize>> {
117        execution_order_result(self, source)
118    }
119
120    /// Get cells invalidated by a change.
121    ///
122    /// Returns all cells that need to be re-executed when the given cell changes.
123    pub fn get_invalidated(&self, source: SourceFile, changed_idx: usize) -> Vec<usize> {
124        invalidated_by(self, source, changed_idx)
125    }
126
127    /// Get parallel execution levels.
128    ///
129    /// Returns groups of cells that can be executed in parallel.
130    pub fn get_parallel_levels(&self, source: SourceFile) -> Vec<Vec<usize>> {
131        parallel_levels(self, source)
132    }
133
134    // =========================================================================
135    // Compilation
136    // =========================================================================
137
138    /// Create compiler settings input.
139    pub fn create_compiler_settings(
140        &self,
141        build_dir: PathBuf,
142        cache_dir: PathBuf,
143        universe_path: Option<PathBuf>,
144        use_cranelift: bool,
145        opt_level: u8,
146    ) -> CompilerSettings {
147        CompilerSettings::new(
148            self,
149            build_dir,
150            cache_dir,
151            universe_path,
152            use_cranelift,
153            opt_level,
154        )
155    }
156
157    /// Get the dependency hash for a notebook.
158    ///
159    /// This hash represents all external crate dependencies.
160    pub fn get_dependency_hash(&self, source: SourceFile) -> u64 {
161        dependency_hash(self, source)
162    }
163
164    /// Compile a specific cell.
165    ///
166    /// Returns the compilation status (success, cached, or failed).
167    pub fn compile_cell(
168        &self,
169        source: SourceFile,
170        cell_idx: usize,
171        settings: CompilerSettings,
172    ) -> CompilationStatus {
173        compiled_cell(self, source, cell_idx, settings)
174    }
175
176    /// Compile all cells in execution order.
177    ///
178    /// Returns compilation results for all cells.
179    pub fn compile_all(
180        &self,
181        source: SourceFile,
182        settings: CompilerSettings,
183    ) -> Arc<Vec<CompilationStatus>> {
184        compile_all_cells(self, source, settings)
185    }
186
187    // =========================================================================
188    // Cell Outputs
189    // =========================================================================
190
191    /// Create a new cell outputs input with all cells pending.
192    ///
193    /// Call this after parsing cells to initialize the outputs tracking.
194    pub fn create_cell_outputs(&self, cell_count: usize) -> CellOutputs {
195        CellOutputs::new(
196            self,
197            Arc::new(vec![ExecutionStatus::Pending; cell_count]),
198            0,
199        )
200    }
201
202    /// Update the execution status for a specific cell.
203    ///
204    /// This will increment the version counter and invalidate any
205    /// queries that depend on this cell's output.
206    ///
207    /// # Panics
208    ///
209    /// In debug builds, panics if `cell_idx` is out of bounds. In release
210    /// builds, out-of-bounds indices are silently ignored (but version is
211    /// still incremented, which may cause unnecessary invalidations).
212    ///
213    /// # Example
214    ///
215    /// ```ignore
216    /// let outputs = db.create_cell_outputs(3);
217    /// db.set_cell_output(outputs, 0, ExecutionStatus::Running);
218    /// ```
219    pub fn set_cell_output(
220        &mut self,
221        outputs: CellOutputs,
222        cell_idx: usize,
223        status: ExecutionStatus,
224    ) {
225        let mut statuses = (*outputs.statuses(self)).clone();
226
227        // Debug assertion to catch programming errors early
228        debug_assert!(
229            cell_idx < statuses.len(),
230            "cell_idx {} is out of bounds (len={}). \
231             Did you forget to call create_cell_outputs() with the correct count?",
232            cell_idx,
233            statuses.len()
234        );
235
236        if cell_idx < statuses.len() {
237            statuses[cell_idx] = status;
238            let new_version = outputs.version(self) + 1;
239            outputs.set_statuses(self).to(Arc::new(statuses));
240            outputs.set_version(self).to(new_version);
241        } else {
242            // Log warning in release builds for diagnosability
243            tracing::warn!(
244                "Attempted to set output for cell {} but only {} cells exist",
245                cell_idx,
246                statuses.len()
247            );
248        }
249    }
250
251    /// Get the execution status for a specific cell.
252    ///
253    /// Returns `ExecutionStatus::Pending` if the cell index is out of bounds.
254    pub fn get_cell_output(&self, outputs: CellOutputs, cell_idx: usize) -> ExecutionStatus {
255        cell_output(self, outputs, cell_idx)
256    }
257
258    /// Get the output data for a cell if it executed successfully.
259    ///
260    /// Returns `None` if the cell is pending, running, failed, or out of bounds.
261    pub fn get_cell_output_data(&self, outputs: CellOutputs, cell_idx: usize) -> Option<CellOutputData> {
262        cell_output_data(self, outputs, cell_idx)
263    }
264
265    /// Check if all cells have finished executing.
266    pub fn are_all_cells_executed(&self, outputs: CellOutputs) -> bool {
267        all_cells_executed(self, outputs)
268    }
269
270    /// Mark a cell as currently running.
271    ///
272    /// # Panics
273    ///
274    /// In debug builds, panics if `cell_idx` is out of bounds.
275    pub fn mark_cell_running(&mut self, outputs: CellOutputs, cell_idx: usize) {
276        self.set_cell_output(outputs, cell_idx, ExecutionStatus::Running);
277    }
278
279    /// Mark a cell as failed with an error message.
280    ///
281    /// # Panics
282    ///
283    /// In debug builds, panics if `cell_idx` is out of bounds.
284    pub fn mark_cell_failed(&mut self, outputs: CellOutputs, cell_idx: usize, error: String) {
285        self.set_cell_output(outputs, cell_idx, ExecutionStatus::Failed(error));
286    }
287
288    /// Mark a cell as successfully executed with output data.
289    ///
290    /// # Panics
291    ///
292    /// In debug builds, panics if `cell_idx` is out of bounds.
293    pub fn mark_cell_success(&mut self, outputs: CellOutputs, cell_idx: usize, output: CellOutputData) {
294        self.set_cell_output(outputs, cell_idx, ExecutionStatus::Success(output));
295    }
296
297    // =========================================================================
298    // Cache Persistence
299    // =========================================================================
300
301    /// Create a cache snapshot from current compilation state.
302    ///
303    /// This captures all successfully compiled cells so they can be
304    /// restored on the next startup without recompilation.
305    ///
306    /// # Arguments
307    ///
308    /// * `toolchain_version` - Current rustc version string
309    /// * `dependency_hash` - Hash of external dependencies
310    /// * `cells` - List of (name, source_hash, compilation_status)
311    ///
312    /// # Example
313    ///
314    /// ```ignore
315    /// let snapshot = db.create_cache_snapshot(
316    ///     toolchain.version().to_string(),
317    ///     db.get_dependency_hash(source),
318    ///     compiled_cells,
319    /// );
320    /// CachePersistence::save(&cache_path, &snapshot)?;
321    /// ```
322    pub fn create_cache_snapshot(
323        &self,
324        toolchain_version: String,
325        dependency_hash: u64,
326        cells: Vec<(String, u64, CompilationStatus)>,
327    ) -> cache::CacheSnapshot {
328        let mut snapshot = cache::CacheSnapshot::new(toolchain_version, dependency_hash);
329
330        for (name, source_hash, status) in cells {
331            let cached_cell = match status {
332                CompilationStatus::Success(ref data) => cache::CachedCell::success(
333                    name,
334                    source_hash,
335                    data.dylib_path.to_string_lossy().to_string(),
336                ),
337                CompilationStatus::Cached(ref data) => cache::CachedCell::cached(
338                    name,
339                    source_hash,
340                    data.dylib_path.to_string_lossy().to_string(),
341                ),
342                CompilationStatus::Failed(ref error) => {
343                    cache::CachedCell::failed(name, source_hash, error.clone())
344                }
345            };
346            snapshot.add_cell(cached_cell);
347        }
348
349        snapshot
350    }
351
352    /// Check if a cached cell can be reused.
353    ///
354    /// Returns `true` if the cell exists in the cache with a matching
355    /// source hash and successful compilation status.
356    pub fn is_cell_cached(
357        &self,
358        snapshot: &cache::CacheSnapshot,
359        cell_name: &str,
360        current_source_hash: u64,
361    ) -> bool {
362        snapshot
363            .get_cell(cell_name)
364            .map(|c| c.source_hash == current_source_hash && c.is_success())
365            .unwrap_or(false)
366    }
367
368    /// Get the dylib path for a cached cell.
369    ///
370    /// Returns `None` if the cell is not in cache or failed compilation.
371    pub fn get_cached_dylib_path(
372        &self,
373        snapshot: &cache::CacheSnapshot,
374        cell_name: &str,
375    ) -> Option<PathBuf> {
376        snapshot.get_cell(cell_name).and_then(|c| {
377            if c.is_success() && !c.dylib_path.is_empty() {
378                Some(PathBuf::from(&c.dylib_path))
379            } else {
380                None
381            }
382        })
383    }
384}
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389
390    #[test]
391    fn test_database_creation() {
392        let _db = VenusDatabase::new();
393    }
394
395    #[test]
396    fn test_source_file_input() {
397        let db = VenusDatabase::new();
398
399        let source = db.set_source(
400            PathBuf::from("test.rs"),
401            r#"
402                #[venus::cell]
403                pub fn config() -> i32 { 42 }
404            "#
405            .to_string(),
406        );
407
408        assert_eq!(source.path(&db), PathBuf::from("test.rs"));
409    }
410
411    #[test]
412    fn test_incremental_update() {
413        let mut db = VenusDatabase::new();
414
415        // Initial source
416        let source = db.set_source(
417            PathBuf::from("test.rs"),
418            r#"
419                #[venus::cell]
420                pub fn a() -> i32 { 1 }
421            "#
422            .to_string(),
423        );
424
425        let order1 = db.get_execution_order(source);
426        assert_eq!(order1.len(), 1);
427
428        // Update source - add a new cell
429        db.update_source(
430            source,
431            r#"
432                #[venus::cell]
433                pub fn a() -> i32 { 1 }
434
435                #[venus::cell]
436                pub fn b(a: &i32) -> i32 { *a + 1 }
437            "#
438            .to_string(),
439        );
440
441        // Salsa automatically invalidates and recomputes
442        let order2 = db.get_execution_order(source);
443        assert_eq!(order2.len(), 2);
444    }
445
446    #[test]
447    fn test_compiler_settings() {
448        let db = VenusDatabase::new();
449
450        let settings = db.create_compiler_settings(
451            PathBuf::from(".venus/build"),
452            PathBuf::from(".venus/cache"),
453            Some(PathBuf::from(".venus/universe/libvenus_universe.so")),
454            true,
455            0,
456        );
457
458        assert_eq!(settings.build_dir(&db), PathBuf::from(".venus/build"));
459        assert_eq!(settings.cache_dir(&db), PathBuf::from(".venus/cache"));
460        assert!(settings.use_cranelift(&db));
461        assert_eq!(settings.opt_level(&db), 0);
462    }
463
464    #[test]
465    fn test_cell_outputs_creation() {
466        let db = VenusDatabase::new();
467
468        let outputs = db.create_cell_outputs(3);
469
470        // All cells should start as pending
471        assert!(matches!(
472            db.get_cell_output(outputs, 0),
473            ExecutionStatus::Pending
474        ));
475        assert!(matches!(
476            db.get_cell_output(outputs, 1),
477            ExecutionStatus::Pending
478        ));
479        assert!(matches!(
480            db.get_cell_output(outputs, 2),
481            ExecutionStatus::Pending
482        ));
483
484        // Not all cells are executed yet
485        assert!(!db.are_all_cells_executed(outputs));
486    }
487
488    #[test]
489    fn test_cell_output_updates() {
490        let mut db = VenusDatabase::new();
491
492        let outputs = db.create_cell_outputs(2);
493
494        // Mark cell 0 as running
495        db.mark_cell_running(outputs, 0);
496        assert!(matches!(
497            db.get_cell_output(outputs, 0),
498            ExecutionStatus::Running
499        ));
500
501        // Mark cell 0 as failed
502        db.mark_cell_failed(outputs, 0, "error message".to_string());
503        assert!(matches!(
504            db.get_cell_output(outputs, 0),
505            ExecutionStatus::Failed(_)
506        ));
507
508        // Mark cell 1 as successful with output data
509        let output_data = CellOutputData {
510            cell_id: 1,
511            bytes: vec![1, 2, 3],
512            type_hash: 12345,
513            type_name: "i32".to_string(),
514            inputs_hash: 67890,
515            execution_time_ms: 100,
516        };
517        db.mark_cell_success(outputs, 1, output_data.clone());
518
519        // Get output data
520        let retrieved = db.get_cell_output_data(outputs, 1);
521        assert!(retrieved.is_some());
522        assert_eq!(retrieved.unwrap().cell_id, 1);
523
524        // All cells are now executed (one failed, one succeeded)
525        assert!(db.are_all_cells_executed(outputs));
526    }
527}