runmat_snapshot/
lib.rs

1//! # RunMat Snapshot Creator
2//!
3//! High-performance snapshot system for preloading the RunMat standard library.
4//! Inspired by V8's snapshot architecture, this provides:
5//!
6//! - **Zero-copy serialization** with memory mapping
7//! - **Multi-tier compression** with LZ4 and ZSTD
8//! - **Integrity validation** with SHA-256 checksums  
9//! - **Concurrent loading** with lock-free data structures
10//! - **Progressive enhancement** with fallback mechanisms
11//!
12//! ## Architecture
13//!
14//! ```text
15//! ┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
16//! │  Standard Lib   │ -> │   Snapshot       │ -> │   Runtime       │
17//! │  Components     │    │   Generator      │    │   Loader        │
18//! │                 │    │                  │    │                 │
19//! │ • Builtins      │    │ • Serialization  │    │ • Memory Map    │
20//! │ • HIR Cache     │    │ • Compression    │    │ • Validation    │
21//! │ • Bytecode      │    │ • Validation     │    │ • Integration   │
22//! │ • GC Presets    │    │ • Optimization   │    │ • Performance   │
23//! └─────────────────┘    └──────────────────┘    └─────────────────┘
24//! ```
25
26use std::collections::HashMap;
27use std::path::{Path, PathBuf};
28use std::sync::Arc;
29
30/// Type alias for builtin function dispatch table to reduce complexity
31type BuiltinDispatchTable =
32    Arc<RwLock<Vec<fn(&[runmat_builtins::Value]) -> Result<runmat_builtins::Value, String>>>>;
33use std::time::Duration;
34
35use parking_lot::RwLock;
36use serde::{Deserialize, Serialize};
37
38pub mod builder;
39pub mod compression;
40pub mod format;
41pub mod loader;
42pub mod presets;
43pub mod validation;
44
45pub use builder::SnapshotBuilder;
46pub use format::{SnapshotFormat, SnapshotHeader, SnapshotMetadata};
47pub use loader::SnapshotLoader;
48
49/// Core snapshot data containing preloaded standard library components
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct Snapshot {
52    /// Snapshot metadata
53    pub metadata: SnapshotMetadata,
54
55    /// Preloaded builtin functions with optimized dispatch table
56    pub builtins: BuiltinRegistry,
57
58    /// Cached HIR representations of standard library functions
59    pub hir_cache: HirCache,
60
61    /// Precompiled bytecode for common operations
62    pub bytecode_cache: BytecodeCache,
63
64    /// GC configuration presets
65    pub gc_presets: GcPresetCache,
66
67    /// Runtime optimization hints
68    pub optimization_hints: OptimizationHints,
69}
70
71/// Optimized builtin function registry for fast dispatch
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct BuiltinRegistry {
74    /// Function name to index mapping for O(1) lookup
75    pub name_index: HashMap<String, usize>,
76
77    /// Function metadata array (aligned for cache efficiency)
78    pub functions: Vec<BuiltinMetadata>,
79
80    /// Function dispatch table (runtime-generated)
81    #[serde(skip)]
82    pub dispatch_table: BuiltinDispatchTable,
83}
84
85/// Metadata for a builtin function
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct BuiltinMetadata {
88    pub name: String,
89    pub arity: BuiltinArity,
90    pub category: BuiltinCategory,
91    pub complexity: ComputationalComplexity,
92    pub optimization_level: OptimizationLevel,
93}
94
95/// Function arity specification
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub enum BuiltinArity {
98    /// Exact number of arguments
99    Exact(usize),
100    /// Range of arguments (min, max)
101    Range(usize, usize),
102    /// Variadic (minimum arguments)
103    Variadic(usize),
104}
105
106/// Builtin function categories for optimization
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum BuiltinCategory {
109    Math,
110    LinearAlgebra,
111    Statistics,
112    MatrixOps,
113    Trigonometric,
114    Comparison,
115    Utility,
116}
117
118/// Computational complexity for scheduling hints
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub enum ComputationalComplexity {
121    Constant,
122    Linear,
123    Quadratic,
124    Cubic,
125    Exponential,
126}
127
128/// Optimization level for JIT compilation hints
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub enum OptimizationLevel {
131    None,
132    Basic,
133    Aggressive,
134    MaxPerformance,
135}
136
137/// Cached HIR representations
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct HirCache {
140    /// Standard library function HIR
141    pub functions: HashMap<String, runmat_hir::HirProgram>,
142
143    /// Common expression patterns
144    pub patterns: Vec<HirPattern>,
145
146    /// Type inference cache
147    pub type_cache: HashMap<String, runmat_hir::Type>,
148}
149
150/// HIR pattern for common expressions
151#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct HirPattern {
153    pub name: String,
154    pub pattern: runmat_hir::HirProgram,
155    pub frequency: u32,
156    pub optimization_priority: OptimizationLevel,
157}
158
159/// Precompiled bytecode cache
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct BytecodeCache {
162    /// Standard library bytecode
163    pub stdlib_bytecode: HashMap<String, runmat_ignition::Bytecode>,
164
165    /// Common operation bytecode sequences
166    pub operation_sequences: Vec<BytecodeSequence>,
167
168    /// Hotspot bytecode (frequently executed)
169    pub hotspots: Vec<HotspotBytecode>,
170}
171
172/// Bytecode sequence for common operations
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct BytecodeSequence {
175    pub name: String,
176    pub bytecode: runmat_ignition::Bytecode,
177    pub usage_count: u64,
178    pub average_execution_time: Duration,
179}
180
181/// Hotspot bytecode with JIT compilation hints
182#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct HotspotBytecode {
184    pub name: String,
185    pub bytecode: runmat_ignition::Bytecode,
186    pub execution_frequency: u64,
187    pub jit_compilation_threshold: u32,
188    pub optimization_hints: Vec<OptimizationHint>,
189}
190
191/// GC configuration presets
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct GcPresetCache {
194    /// Named GC configurations
195    pub presets: HashMap<String, runmat_gc::GcConfig>,
196
197    /// Default preset name
198    pub default_preset: String,
199
200    /// Performance characteristics for each preset
201    pub performance_profiles: HashMap<String, GcPerformanceProfile>,
202}
203
204/// GC performance profile
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct GcPerformanceProfile {
207    pub average_allocation_rate: f64,
208    pub average_collection_time: Duration,
209    pub memory_overhead: f64,
210    pub throughput_impact: f64,
211}
212
213/// Runtime optimization hints
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct OptimizationHints {
216    /// JIT compilation hints
217    pub jit_hints: Vec<JitHint>,
218
219    /// Memory layout hints
220    pub memory_hints: Vec<MemoryHint>,
221
222    /// Execution pattern hints
223    pub execution_hints: Vec<ExecutionHint>,
224}
225
226/// JIT compilation hint
227#[derive(Debug, Clone, Serialize, Deserialize)]
228pub struct JitHint {
229    pub pattern: String,
230    pub hint_type: JitHintType,
231    pub priority: OptimizationLevel,
232    pub expected_performance_gain: f64,
233}
234
235/// Types of JIT hints
236#[derive(Debug, Clone, Serialize, Deserialize)]
237pub enum JitHintType {
238    InlineCandidate,
239    LoopOptimization,
240    VectorizeCandidate,
241    ConstantFolding,
242    DeadCodeElimination,
243}
244
245/// Memory layout optimization hint
246#[derive(Debug, Clone, Serialize, Deserialize)]
247pub struct MemoryHint {
248    pub data_structure: String,
249    pub hint_type: MemoryHintType,
250    pub alignment: usize,
251    pub prefetch_pattern: PrefetchPattern,
252}
253
254/// Types of memory hints
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub enum MemoryHintType {
257    CacheLocalityOptimization,
258    PrefetchOptimization,
259    AlignmentOptimization,
260    CompressionCandidate,
261}
262
263/// Memory prefetch patterns
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub enum PrefetchPattern {
266    Sequential,
267    Random,
268    Strided(usize),
269    Hierarchical,
270}
271
272/// Execution pattern hint
273#[derive(Debug, Clone, Serialize, Deserialize)]
274pub struct ExecutionHint {
275    pub pattern: String,
276    pub hint_type: ExecutionHintType,
277    pub frequency: u64,
278    pub optimization_potential: f64,
279}
280
281/// Types of execution hints
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub enum ExecutionHintType {
284    HotPath,
285    ColdPath,
286    BranchPrediction,
287    ParallelizationCandidate,
288}
289
290/// Optimization hint for hotspot bytecode
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct OptimizationHint {
293    pub hint_type: String,
294    pub parameters: HashMap<String, String>,
295    pub expected_speedup: f64,
296}
297
298/// Snapshot loading statistics
299#[derive(Debug, Clone)]
300pub struct LoadingStats {
301    pub load_time: Duration,
302    pub decompression_time: Duration,
303    pub validation_time: Duration,
304    pub initialization_time: Duration,
305    pub total_size: usize,
306    pub compressed_size: usize,
307    pub compression_ratio: f64,
308    pub builtin_count: usize,
309    pub cache_hit_rate: f64,
310}
311
312impl LoadingStats {
313    pub fn compression_efficiency(&self) -> f64 {
314        1.0 - (self.compressed_size as f64 / self.total_size as f64)
315    }
316
317    pub fn loading_throughput(&self) -> f64 {
318        self.total_size as f64 / self.load_time.as_secs_f64()
319    }
320}
321
322/// Error types for snapshot operations
323#[derive(thiserror::Error, Debug)]
324pub enum SnapshotError {
325    #[error("IO error: {0}")]
326    Io(#[from] std::io::Error),
327
328    #[error("Serialization error: {0}")]
329    Serialization(#[from] bincode::Error),
330
331    #[error("Compression error: {message}")]
332    Compression { message: String },
333
334    #[error("Validation error: {message}")]
335    Validation { message: String },
336
337    #[error("Version mismatch: expected {expected}, found {found}")]
338    VersionMismatch { expected: String, found: String },
339
340    #[error("Corrupted snapshot: {reason}")]
341    Corrupted { reason: String },
342
343    #[error("Configuration error: {message}")]
344    Configuration { message: String },
345}
346
347/// Result type for snapshot operations
348pub type SnapshotResult<T> = std::result::Result<T, SnapshotError>;
349
350/// Snapshot configuration
351#[derive(Debug, Clone)]
352pub struct SnapshotConfig {
353    /// Enable compression
354    pub compression_enabled: bool,
355
356    /// Compression algorithm
357    pub compression_algorithm: CompressionAlgorithm,
358
359    /// Compression level (1-9)
360    pub compression_level: u32,
361
362    /// Enable validation
363    pub validation_enabled: bool,
364
365    /// Memory mapping for loading
366    pub memory_mapping_enabled: bool,
367
368    /// Parallel loading
369    pub parallel_loading: bool,
370
371    /// Progress reporting
372    pub progress_reporting: bool,
373
374    /// Maximum cache size
375    pub max_cache_size: usize,
376
377    /// Cache eviction policy
378    pub cache_eviction_policy: CacheEvictionPolicy,
379}
380
381/// Compression algorithm options
382#[derive(Debug, Clone)]
383pub enum CompressionAlgorithm {
384    None,
385    Lz4,
386    Zstd,
387    Auto, // Choose best based on data characteristics
388}
389
390/// Cache eviction policies
391#[derive(Debug, Clone)]
392pub enum CacheEvictionPolicy {
393    LeastRecentlyUsed,
394    LeastFrequentlyUsed,
395    TimeToLive(Duration),
396    Adaptive,
397}
398
399impl Default for SnapshotConfig {
400    fn default() -> Self {
401        Self {
402            compression_enabled: true,
403            compression_algorithm: CompressionAlgorithm::Auto,
404            compression_level: 6,
405            validation_enabled: true,
406            memory_mapping_enabled: true,
407            parallel_loading: true,
408            progress_reporting: false,
409            max_cache_size: 128 * 1024 * 1024, // 128MB
410            cache_eviction_policy: CacheEvictionPolicy::Adaptive,
411        }
412    }
413}
414
415/// Main snapshot interface
416pub struct SnapshotManager {
417    config: SnapshotConfig,
418    cache: Arc<RwLock<HashMap<PathBuf, Arc<Snapshot>>>>,
419    stats: Arc<RwLock<HashMap<PathBuf, LoadingStats>>>,
420}
421
422impl SnapshotManager {
423    /// Create a new snapshot manager
424    pub fn new(config: SnapshotConfig) -> Self {
425        Self {
426            config,
427            cache: Arc::new(RwLock::new(HashMap::new())),
428            stats: Arc::new(RwLock::new(HashMap::new())),
429        }
430    }
431
432    /// Create a snapshot from the current standard library
433    pub fn create_snapshot<P: AsRef<Path>>(&self, output_path: P) -> SnapshotResult<()> {
434        let builder = SnapshotBuilder::new(self.config.clone());
435        builder.build_and_save(output_path)
436    }
437
438    /// Load a snapshot from disk
439    pub fn load_snapshot<P: AsRef<Path>>(&self, snapshot_path: P) -> SnapshotResult<Arc<Snapshot>> {
440        let path = snapshot_path.as_ref().to_path_buf();
441
442        // Check cache first
443        {
444            let cache = self.cache.read();
445            if let Some(snapshot) = cache.get(&path) {
446                return Ok(Arc::clone(snapshot));
447            }
448        }
449
450        // Load from disk
451        let mut loader = SnapshotLoader::new(self.config.clone());
452        let (snapshot, stats) = loader.load(&path)?;
453        let snapshot = Arc::new(snapshot);
454
455        // Update cache and stats
456        {
457            let mut cache = self.cache.write();
458            cache.insert(path.clone(), Arc::clone(&snapshot));
459        }
460        {
461            let mut stats_map = self.stats.write();
462            stats_map.insert(path, stats);
463        }
464
465        Ok(snapshot)
466    }
467
468    /// Get loading statistics for a snapshot
469    pub fn get_stats<P: AsRef<Path>>(&self, snapshot_path: P) -> Option<LoadingStats> {
470        let stats = self.stats.read();
471        stats.get(snapshot_path.as_ref()).cloned()
472    }
473
474    /// Clear snapshot cache
475    pub fn clear_cache(&self) {
476        let mut cache = self.cache.write();
477        cache.clear();
478        let mut stats = self.stats.write();
479        stats.clear();
480    }
481
482    /// Get cache statistics
483    pub fn cache_stats(&self) -> (usize, usize) {
484        let cache = self.cache.read();
485        let total_size = cache
486            .values()
487            .map(|snapshot| bincode::serialized_size(&**snapshot).unwrap_or(0) as usize)
488            .sum();
489        (cache.len(), total_size)
490    }
491}
492
493impl Default for SnapshotManager {
494    fn default() -> Self {
495        Self::new(SnapshotConfig::default())
496    }
497}
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502
503    #[test]
504    fn test_snapshot_config_default() {
505        let config = SnapshotConfig::default();
506        assert!(config.compression_enabled);
507        assert!(config.validation_enabled);
508        assert!(config.memory_mapping_enabled);
509        assert!(config.parallel_loading);
510    }
511
512    #[test]
513    fn test_snapshot_manager_creation() {
514        let manager = SnapshotManager::default();
515        let (cache_entries, cache_size) = manager.cache_stats();
516        assert_eq!(cache_entries, 0);
517        assert_eq!(cache_size, 0);
518    }
519
520    #[test]
521    fn test_loading_stats_calculations() {
522        let stats = LoadingStats {
523            load_time: Duration::from_millis(100),
524            decompression_time: Duration::from_millis(20),
525            validation_time: Duration::from_millis(10),
526            initialization_time: Duration::from_millis(5),
527            total_size: 1000,
528            compressed_size: 600,
529            compression_ratio: 0.4,
530            builtin_count: 50,
531            cache_hit_rate: 0.8,
532        };
533
534        assert_eq!(stats.compression_efficiency(), 0.4);
535        assert_eq!(stats.loading_throughput(), 10000.0); // bytes per second
536    }
537}