Skip to main content

runmat_snapshot/
lib.rs

1//! # RunMat Snapshot Creator
2//!
3//! High-performance snapshot system for preloading the RunMat standard library.
4//! Inspired by V8's snapshot architecture, this provides:
5//!
6//! - **Zero-copy serialization** with memory mapping
7//! - **Multi-tier compression** with LZ4 and ZSTD
8//! - **Integrity validation** with SHA-256 checksums  
9//! - **Concurrent loading** with lock-free data structures
10//! - **Progressive enhancement** with fallback mechanisms
11//!
12//! ## Architecture
13//!
14//! ```text
15//! ┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
16//! │  Standard Lib   │ -> │   Snapshot       │ -> │   Runtime       │
17//! │  Components     │    │   Generator      │    │   Loader        │
18//! │                 │    │                  │    │                 │
19//! │ • Builtins      │    │ • Serialization  │    │ • Memory Map    │
20//! │ • HIR Cache     │    │ • Compression    │    │ • Validation    │
21//! │ • Bytecode      │    │ • Validation     │    │ • Integration   │
22//! │ • GC Presets    │    │ • Optimization   │    │ • Performance   │
23//! └─────────────────┘    └──────────────────┘    └─────────────────┘
24//! ```
25
26use std::collections::HashMap;
27use std::path::{Path, PathBuf};
28use std::sync::Arc;
29
30/// Type alias for builtin function dispatch table to reduce complexity
31type BuiltinDispatchTable =
32    Arc<RwLock<Vec<fn(&[runmat_builtins::Value]) -> runmat_builtins::BuiltinFuture>>>;
33use std::time::Duration;
34
35use parking_lot::RwLock;
36use serde::{Deserialize, Serialize};
37
38pub mod builder;
39pub mod compression;
40pub mod format;
41pub mod loader;
42pub mod presets;
43pub mod validation;
44
45pub use builder::SnapshotBuilder;
46pub use format::{SnapshotFormat, SnapshotHeader, SnapshotMetadata};
47pub use loader::SnapshotLoader;
48
49/// Core snapshot data containing preloaded standard library components
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct Snapshot {
52    /// Snapshot metadata
53    pub metadata: SnapshotMetadata,
54
55    /// Preloaded builtin functions with optimized dispatch table
56    pub builtins: BuiltinRegistry,
57
58    /// Cached HIR representations of standard library functions
59    pub hir_cache: HirCache,
60
61    /// Precompiled bytecode for common operations
62    pub bytecode_cache: BytecodeCache,
63
64    /// GC configuration presets
65    pub gc_presets: GcPresetCache,
66
67    /// Runtime optimization hints
68    pub optimization_hints: OptimizationHints,
69}
70
71/// Optimized builtin function registry for fast dispatch
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct BuiltinRegistry {
74    /// Function name to index mapping for O(1) lookup
75    pub name_index: HashMap<String, usize>,
76
77    /// Function metadata array (aligned for cache efficiency)
78    pub functions: Vec<BuiltinMetadata>,
79
80    /// Function dispatch table (runtime-generated)
81    #[serde(skip)]
82    pub dispatch_table: BuiltinDispatchTable,
83}
84
85/// Metadata for a builtin function
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct BuiltinMetadata {
88    pub name: String,
89    pub arity: BuiltinArity,
90    pub category: BuiltinCategory,
91    pub complexity: ComputationalComplexity,
92    pub optimization_level: OptimizationLevel,
93}
94
95/// Function arity specification
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub enum BuiltinArity {
98    /// Exact number of arguments
99    Exact(usize),
100    /// Range of arguments (min, max)
101    Range(usize, usize),
102    /// Variadic (minimum arguments)
103    Variadic(usize),
104}
105
106/// Builtin function categories for optimization
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum BuiltinCategory {
109    Math,
110    LinearAlgebra,
111    Statistics,
112    MatrixOps,
113    Trigonometric,
114    Comparison,
115    Utility,
116}
117
118/// Computational complexity for scheduling hints
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub enum ComputationalComplexity {
121    Constant,
122    Linear,
123    Quadratic,
124    Cubic,
125    Exponential,
126}
127
128/// Optimization level for JIT compilation hints
129#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
130pub enum OptimizationLevel {
131    None,
132    Basic,
133    Aggressive,
134    MaxPerformance,
135}
136
137/// Cached HIR representations
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct HirCache {
140    /// Standard library function HIR
141    pub functions: HashMap<String, runmat_hir::HirProgram>,
142
143    /// Common expression patterns
144    pub patterns: Vec<HirPattern>,
145
146    /// Type inference cache
147    pub type_cache: HashMap<String, runmat_hir::Type>,
148}
149
150/// HIR pattern for common expressions
151#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct HirPattern {
153    pub name: String,
154    pub pattern: runmat_hir::HirProgram,
155    pub frequency: u32,
156    pub optimization_priority: OptimizationLevel,
157}
158
159/// Precompiled bytecode cache
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct BytecodeCache {
162    /// Standard library bytecode
163    pub stdlib_bytecode: HashMap<String, runmat_vm::Bytecode>,
164
165    /// Common operation bytecode sequences
166    pub operation_sequences: Vec<BytecodeSequence>,
167
168    /// Hotspot bytecode (frequently executed)
169    pub hotspots: Vec<HotspotBytecode>,
170}
171
172/// Bytecode sequence for common operations
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct BytecodeSequence {
175    pub name: String,
176    pub bytecode: runmat_vm::Bytecode,
177    pub usage_count: u64,
178    pub average_execution_time: Duration,
179}
180
181/// Hotspot bytecode with JIT compilation hints
182#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct HotspotBytecode {
184    pub name: String,
185    pub bytecode: runmat_vm::Bytecode,
186    pub execution_frequency: u64,
187    pub jit_compilation_threshold: u32,
188    pub optimization_hints: Vec<OptimizationHint>,
189}
190
191/// GC configuration presets
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct GcPresetCache {
194    /// Named GC configurations
195    pub presets: HashMap<String, runmat_gc::GcConfig>,
196
197    /// Default preset name
198    pub default_preset: String,
199
200    /// Performance characteristics for each preset
201    pub performance_profiles: HashMap<String, GcPerformanceProfile>,
202}
203
204/// GC performance profile
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct GcPerformanceProfile {
207    pub average_allocation_rate: f64,
208    pub average_collection_time: Duration,
209    pub memory_overhead: f64,
210    pub throughput_impact: f64,
211}
212
213/// Runtime optimization hints
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct OptimizationHints {
216    /// JIT compilation hints
217    pub jit_hints: Vec<JitHint>,
218
219    /// Memory layout hints
220    pub memory_hints: Vec<MemoryHint>,
221
222    /// Execution pattern hints
223    pub execution_hints: Vec<ExecutionHint>,
224}
225
226/// JIT compilation hint
227#[derive(Debug, Clone, Serialize, Deserialize)]
228pub struct JitHint {
229    pub pattern: String,
230    pub hint_type: JitHintType,
231    pub priority: OptimizationLevel,
232    pub expected_performance_gain: f64,
233}
234
235/// Types of JIT hints
236#[derive(Debug, Clone, Serialize, Deserialize)]
237pub enum JitHintType {
238    InlineCandidate,
239    LoopOptimization,
240    VectorizeCandidate,
241    ConstantFolding,
242    DeadCodeElimination,
243}
244
245/// Memory layout optimization hint
246#[derive(Debug, Clone, Serialize, Deserialize)]
247pub struct MemoryHint {
248    pub data_structure: String,
249    pub hint_type: MemoryHintType,
250    pub alignment: usize,
251    pub prefetch_pattern: PrefetchPattern,
252}
253
254/// Types of memory hints
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub enum MemoryHintType {
257    CacheLocalityOptimization,
258    PrefetchOptimization,
259    AlignmentOptimization,
260    CompressionCandidate,
261}
262
263/// Memory prefetch patterns
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub enum PrefetchPattern {
266    Sequential,
267    Random,
268    Strided(usize),
269    Hierarchical,
270}
271
272/// Execution pattern hint
273#[derive(Debug, Clone, Serialize, Deserialize)]
274pub struct ExecutionHint {
275    pub pattern: String,
276    pub hint_type: ExecutionHintType,
277    pub frequency: u64,
278    pub optimization_potential: f64,
279}
280
281/// Types of execution hints
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub enum ExecutionHintType {
284    HotPath,
285    ColdPath,
286    BranchPrediction,
287    ParallelizationCandidate,
288}
289
290/// Optimization hint for hotspot bytecode
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct OptimizationHint {
293    pub hint_type: String,
294    pub parameters: HashMap<String, String>,
295    pub expected_speedup: f64,
296}
297
298/// Snapshot loading statistics
299#[derive(Debug, Clone)]
300pub struct LoadingStats {
301    pub load_time: Duration,
302    pub decompression_time: Duration,
303    pub validation_time: Duration,
304    pub initialization_time: Duration,
305    pub total_size: u64,
306    pub compressed_size: u64,
307    pub compression_ratio: f64,
308    pub builtin_count: u64,
309    pub cache_hit_rate: f64,
310}
311
312impl LoadingStats {
313    pub fn compression_efficiency(&self) -> f64 {
314        1.0 - (self.compressed_size as f64 / self.total_size as f64)
315    }
316
317    pub fn loading_throughput(&self) -> f64 {
318        self.total_size as f64 / self.load_time.as_secs_f64()
319    }
320}
321
322/// Error types for snapshot operations
323#[derive(thiserror::Error, Debug)]
324pub enum SnapshotError {
325    #[error("IO error: {0}")]
326    Io(#[from] std::io::Error),
327
328    #[error("Serialization error: {0}")]
329    Serialization(#[from] bincode::Error),
330
331    #[error("Compression error: {message}")]
332    Compression { message: String },
333
334    #[error("Validation error: {message}")]
335    Validation { message: String },
336
337    #[error("Version mismatch: expected {expected}, found {found}")]
338    VersionMismatch { expected: String, found: String },
339
340    #[error("Corrupted snapshot: {reason}")]
341    Corrupted { reason: String },
342
343    #[error("Configuration error: {message}")]
344    Configuration { message: String },
345}
346
347/// Result type for snapshot operations
348pub type SnapshotResult<T> = std::result::Result<T, SnapshotError>;
349
350/// Snapshot configuration
351#[derive(Debug, Clone)]
352pub struct SnapshotConfig {
353    /// Enable compression
354    pub compression_enabled: bool,
355
356    /// Compression algorithm
357    pub compression_algorithm: CompressionAlgorithm,
358
359    /// Compression level (1-9)
360    pub compression_level: u32,
361
362    /// Enable validation
363    pub validation_enabled: bool,
364
365    /// Memory mapping for loading
366    pub memory_mapping_enabled: bool,
367
368    /// Parallel loading
369    pub parallel_loading: bool,
370
371    /// Progress reporting
372    pub progress_reporting: bool,
373
374    /// Maximum optimization level to apply while building snapshot hints
375    pub max_optimization_level: OptimizationLevel,
376
377    /// Maximum cache size
378    pub max_cache_size: usize,
379
380    /// Cache eviction policy
381    pub cache_eviction_policy: CacheEvictionPolicy,
382}
383
384/// Compression algorithm options
385#[derive(Debug, Clone)]
386pub enum CompressionAlgorithm {
387    None,
388    Lz4,
389    Zstd,
390    Auto, // Choose best based on data characteristics
391}
392
393/// Cache eviction policies
394#[derive(Debug, Clone)]
395pub enum CacheEvictionPolicy {
396    LeastRecentlyUsed,
397    LeastFrequentlyUsed,
398    TimeToLive(Duration),
399    Adaptive,
400}
401
402impl Default for SnapshotConfig {
403    fn default() -> Self {
404        Self {
405            compression_enabled: true,
406            compression_algorithm: CompressionAlgorithm::Auto,
407            compression_level: 6,
408            validation_enabled: true,
409            memory_mapping_enabled: true,
410            parallel_loading: true,
411            progress_reporting: false,
412            max_optimization_level: OptimizationLevel::MaxPerformance,
413            max_cache_size: 128 * 1024 * 1024, // 128MB
414            cache_eviction_policy: CacheEvictionPolicy::Adaptive,
415        }
416    }
417}
418
419/// Main snapshot interface
420pub struct SnapshotManager {
421    config: SnapshotConfig,
422    cache: Arc<RwLock<HashMap<PathBuf, Arc<Snapshot>>>>,
423    stats: Arc<RwLock<HashMap<PathBuf, LoadingStats>>>,
424}
425
426impl SnapshotManager {
427    /// Create a new snapshot manager
428    pub fn new(config: SnapshotConfig) -> Self {
429        Self {
430            config,
431            cache: Arc::new(RwLock::new(HashMap::new())),
432            stats: Arc::new(RwLock::new(HashMap::new())),
433        }
434    }
435
436    /// Create a snapshot from the current standard library
437    pub fn create_snapshot<P: AsRef<Path>>(&self, output_path: P) -> SnapshotResult<()> {
438        let builder = SnapshotBuilder::new(self.config.clone());
439        builder.build_and_save(output_path)
440    }
441
442    /// Load a snapshot from disk
443    pub fn load_snapshot<P: AsRef<Path>>(&self, snapshot_path: P) -> SnapshotResult<Arc<Snapshot>> {
444        let path = snapshot_path.as_ref().to_path_buf();
445
446        // Check cache first
447        {
448            let cache = self.cache.read();
449            if let Some(snapshot) = cache.get(&path) {
450                return Ok(Arc::clone(snapshot));
451            }
452        }
453
454        // Load from disk
455        let mut loader = SnapshotLoader::new(self.config.clone());
456        let (snapshot, stats) = loader.load(&path)?;
457        let snapshot = Arc::new(snapshot);
458
459        // Update cache and stats
460        {
461            let mut cache = self.cache.write();
462            cache.insert(path.clone(), Arc::clone(&snapshot));
463        }
464        {
465            let mut stats_map = self.stats.write();
466            stats_map.insert(path, stats);
467        }
468
469        Ok(snapshot)
470    }
471
472    /// Get loading statistics for a snapshot
473    pub fn get_stats<P: AsRef<Path>>(&self, snapshot_path: P) -> Option<LoadingStats> {
474        let stats = self.stats.read();
475        stats.get(snapshot_path.as_ref()).cloned()
476    }
477
478    /// Clear snapshot cache
479    pub fn clear_cache(&self) {
480        let mut cache = self.cache.write();
481        cache.clear();
482        let mut stats = self.stats.write();
483        stats.clear();
484    }
485
486    /// Get cache statistics
487    pub fn cache_stats(&self) -> (usize, usize) {
488        let cache = self.cache.read();
489        let total_size = cache
490            .values()
491            .map(|snapshot| bincode::serialized_size(&**snapshot).unwrap_or(0) as usize)
492            .sum();
493        (cache.len(), total_size)
494    }
495}
496
497impl Default for SnapshotManager {
498    fn default() -> Self {
499        Self::new(SnapshotConfig::default())
500    }
501}
502
503#[cfg(test)]
504mod tests {
505    use super::*;
506
507    #[test]
508    fn test_snapshot_config_default() {
509        let config = SnapshotConfig::default();
510        assert!(config.compression_enabled);
511        assert!(config.validation_enabled);
512        assert!(config.memory_mapping_enabled);
513        assert!(config.parallel_loading);
514    }
515
516    #[test]
517    fn test_snapshot_manager_creation() {
518        let manager = SnapshotManager::default();
519        let (cache_entries, cache_size) = manager.cache_stats();
520        assert_eq!(cache_entries, 0);
521        assert_eq!(cache_size, 0);
522    }
523
524    #[test]
525    fn test_loading_stats_calculations() {
526        let stats = LoadingStats {
527            load_time: Duration::from_millis(100),
528            decompression_time: Duration::from_millis(20),
529            validation_time: Duration::from_millis(10),
530            initialization_time: Duration::from_millis(5),
531            total_size: 1000,
532            compressed_size: 600,
533            compression_ratio: 0.4,
534            builtin_count: 50,
535            cache_hit_rate: 0.8,
536        };
537
538        assert_eq!(stats.compression_efficiency(), 0.4);
539        assert_eq!(stats.loading_throughput(), 10000.0); // bytes per second
540    }
541}