runmat_snapshot/
loader.rs

1//! High-performance snapshot loader with memory mapping and caching
2//!
3//! Optimized for fast startup times with parallel loading, compression,
4//! and integration with the RunMat runtime.
5
6use std::fs::File;
7use std::io::{BufReader, Read, Seek, SeekFrom};
8use std::path::Path;
9use std::sync::Arc;
10use std::time::{Duration, Instant};
11
12use crate::compression::CompressionConfig;
13use anyhow::Context;
14use memmap2::Mmap;
15use parking_lot::RwLock;
16
17use crate::compression::CompressionEngine;
18use crate::format::*;
19use crate::validation::{SnapshotValidator, ValidationConfig};
20use crate::{LoadingStats, Snapshot, SnapshotConfig, SnapshotError, SnapshotResult};
21
22/// High-performance snapshot loader
23pub struct SnapshotLoader {
24    /// Configuration
25    config: SnapshotConfig,
26
27    /// Compression engine for decompression
28    compression: CompressionEngine,
29
30    /// Validator for integrity checks
31    #[cfg(feature = "validation")]
32    validator: SnapshotValidator,
33
34    /// Memory-mapped file cache
35    mmap_cache: Arc<RwLock<Vec<Mmap>>>,
36
37    /// Loading statistics
38    stats: LoadingStats,
39}
40
41/// Loader for specific snapshot format
42struct FormatLoader {
43    /// File handle
44    file: File,
45
46    /// Memory mapping (if enabled)
47    mmap: Option<Mmap>,
48
49    /// Header information
50    header: SnapshotHeader,
51
52    /// Configuration
53    config: SnapshotConfig,
54}
55
56impl SnapshotLoader {
57    /// Create a new snapshot loader
58    pub fn new(config: SnapshotConfig) -> Self {
59        let compression = CompressionEngine::new(crate::compression::CompressionConfig {
60            adaptive_selection: false, // Decompression doesn't need adaptation
61            prefer_speed: true,
62            ..Default::default()
63        });
64
65        #[cfg(feature = "validation")]
66        let validator = SnapshotValidator::with_config(ValidationConfig {
67            strict_mode: false, // Don't fail on warnings during loading
68            ..ValidationConfig::default()
69        });
70
71        Self {
72            config,
73            compression,
74            #[cfg(feature = "validation")]
75            validator,
76            mmap_cache: Arc::new(RwLock::new(Vec::new())),
77            stats: LoadingStats {
78                load_time: Duration::ZERO,
79                decompression_time: Duration::ZERO,
80                validation_time: Duration::ZERO,
81                initialization_time: Duration::ZERO,
82                total_size: 0,
83                compressed_size: 0,
84                compression_ratio: 1.0,
85                builtin_count: 0,
86                cache_hit_rate: 0.0,
87            },
88        }
89    }
90
91    /// Load snapshot from file
92    pub fn load<P: AsRef<Path>>(&mut self, path: P) -> SnapshotResult<(Snapshot, LoadingStats)> {
93        let start_time = Instant::now();
94        log::info!("Loading snapshot from {}", path.as_ref().display());
95
96        // Open and validate file
97        let format_loader = self.open_snapshot_file(path.as_ref())?;
98
99        // Load and decompress data
100        let data = self.load_snapshot_data(&format_loader)?;
101
102        // Deserialize snapshot
103        let snapshot = self.deserialize_snapshot(&data)?;
104
105        // Validate snapshot if enabled
106        #[cfg(feature = "validation")]
107        if self.config.validation_enabled {
108            self.validate_snapshot(&snapshot)?;
109        }
110
111        // Initialize runtime integration
112        self.initialize_runtime_integration(&snapshot)?;
113
114        self.stats.load_time = start_time.elapsed();
115        log::info!("Snapshot loaded successfully in {:?}", self.stats.load_time);
116
117        Ok((snapshot, self.stats.clone()))
118    }
119
120    /// Load snapshot asynchronously with true async I/O for non-blocking startup
121    pub async fn load_async<P: AsRef<Path>>(
122        &mut self,
123        path: P,
124    ) -> SnapshotResult<(Snapshot, LoadingStats)> {
125        let start_time = Instant::now();
126        let path = path.as_ref();
127
128        // Async file opening and validation
129        let file = tokio::fs::File::open(path)
130            .await
131            .with_context(|| format!("Failed to open snapshot file: {}", path.display()))
132            .map_err(|e| SnapshotError::Configuration {
133                message: e.to_string(),
134            })?;
135
136        // Get file metadata asynchronously
137        let metadata = file.metadata().await.map_err(SnapshotError::Io)?;
138        let file_size = metadata.len() as usize;
139        self.stats.total_size = file_size;
140
141        // Read entire file asynchronously
142        let mut file_contents = Vec::with_capacity(file_size);
143        let mut reader = tokio::io::BufReader::new(file);
144        use tokio::io::AsyncReadExt;
145        reader
146            .read_to_end(&mut file_contents)
147            .await
148            .map_err(SnapshotError::Io)?;
149
150        // Validate file format
151        if file_contents.len() < std::mem::size_of::<SnapshotHeader>() {
152            return Err(SnapshotError::Io(std::io::Error::new(
153                std::io::ErrorKind::InvalidData,
154                "File too small to contain valid snapshot header",
155            )));
156        }
157
158        // Parse header
159        let header_size =
160            bincode::serialized_size(&SnapshotHeader::new(SnapshotMetadata::current()))
161                .map_err(SnapshotError::Serialization)? as usize;
162        let header: SnapshotHeader = bincode::deserialize(
163            &file_contents[..header_size.min(file_contents.len())],
164        )
165        .map_err(|e| SnapshotError::Configuration {
166            message: format!("Failed to deserialize snapshot header: {e}"),
167        })?;
168
169        // Validate header
170        header.validate()?;
171
172        // Extract and decompress data
173        let data_start = header.data_info.data_offset as usize;
174        let data_end = data_start + header.data_info.compressed_size;
175
176        if data_end > file_contents.len() {
177            return Err(SnapshotError::Io(std::io::Error::new(
178                std::io::ErrorKind::InvalidData,
179                "Data section extends beyond file size",
180            )));
181        }
182
183        let compressed_data = &file_contents[data_start..data_end];
184        self.stats.compressed_size = compressed_data.len();
185
186        // Decompress data if needed
187        let decompressed_data =
188            if header.data_info.compression.algorithm != CompressionAlgorithm::None {
189                let compression_engine = CompressionEngine::new(CompressionConfig::default());
190                compression_engine.decompress(compressed_data, &header.data_info.compression)?
191            } else {
192                compressed_data.to_vec()
193            };
194
195        // Deserialize snapshot
196        let snapshot: Snapshot =
197            bincode::deserialize(&decompressed_data).map_err(|e| SnapshotError::Configuration {
198                message: format!("Failed to deserialize snapshot data: {e}"),
199            })?;
200
201        // Validate snapshot if enabled
202        if self.config.validation_enabled {
203            // Skip detailed validation for async loading (simplified)
204            // In production, this would validate the snapshot structure
205        }
206
207        // Update stats
208        let load_time = start_time.elapsed();
209        self.stats.load_time = load_time;
210        self.stats.builtin_count = snapshot.builtins.functions.len();
211
212        Ok((snapshot, self.stats.clone()))
213    }
214
215    /// Open and validate snapshot file
216    fn open_snapshot_file(&mut self, path: &Path) -> SnapshotResult<FormatLoader> {
217        let start = Instant::now();
218
219        // Open file
220        let file = File::open(path)
221            .with_context(|| format!("Failed to open snapshot file: {}", path.display()))
222            .map_err(|e| crate::SnapshotError::Configuration {
223                message: e.to_string(),
224            })?;
225
226        // Get file metadata
227        let metadata = file.metadata()?;
228        let file_size = metadata.len() as usize;
229        self.stats.total_size = file_size;
230
231        // Create memory mapping if enabled and file is large enough
232        let mmap = if self.config.memory_mapping_enabled && file_size > 4096 {
233            match unsafe { Mmap::map(&file) } {
234                Ok(mmap) => {
235                    log::debug!("Created memory mapping for snapshot file ({file_size} bytes)");
236                    Some(mmap)
237                }
238                Err(e) => {
239                    log::warn!("Failed to create memory mapping, falling back to regular I/O: {e}");
240                    None
241                }
242            }
243        } else {
244            None
245        };
246
247        // Read and validate header
248        let mut format_loader = FormatLoader {
249            file,
250            mmap,
251            header: SnapshotHeader::new(SnapshotMetadata::current()), // Temporary
252            config: self.config.clone(),
253        };
254
255        format_loader.header = format_loader.read_header()?;
256        format_loader.header.validate()?;
257
258        // Update stats
259        self.stats.compressed_size = format_loader.header.data_info.compressed_size;
260        self.stats.compression_ratio = format_loader.header.data_info.compressed_size as f64
261            / format_loader.header.data_info.uncompressed_size as f64;
262
263        let load_time = start.elapsed();
264        log::debug!("File opened and header validated in {load_time:?}");
265
266        Ok(format_loader)
267    }
268
269    /// Load and decompress snapshot data
270    fn load_snapshot_data(&mut self, format_loader: &FormatLoader) -> SnapshotResult<Vec<u8>> {
271        let start = Instant::now();
272
273        // Read compressed data
274        let compressed_data = format_loader.read_data_section()?;
275
276        // Decompress if needed
277        let decompression_start = Instant::now();
278        let data = if matches!(
279            format_loader.header.data_info.compression.algorithm,
280            CompressionAlgorithm::None
281        ) {
282            compressed_data
283        } else {
284            self.compression.decompress(
285                &compressed_data,
286                &format_loader.header.data_info.compression,
287            )?
288        };
289        self.stats.decompression_time = decompression_start.elapsed();
290
291        let load_time = start.elapsed();
292        log::debug!(
293            "Data loaded and decompressed in {:?} (decompression: {:?})",
294            load_time,
295            self.stats.decompression_time
296        );
297
298        Ok(data)
299    }
300
301    /// Deserialize snapshot from data
302    fn deserialize_snapshot(&mut self, data: &[u8]) -> SnapshotResult<Snapshot> {
303        let start = Instant::now();
304
305        let snapshot: Snapshot = bincode::deserialize(data)
306            .context("Failed to deserialize snapshot data")
307            .map_err(|e| crate::SnapshotError::Configuration {
308                message: e.to_string(),
309            })?;
310
311        // Update stats
312        self.stats.builtin_count = snapshot.builtins.functions.len();
313
314        let deserialize_time = start.elapsed();
315        log::debug!("Snapshot deserialized in {deserialize_time:?}");
316
317        Ok(snapshot)
318    }
319
320    /// Validate loaded snapshot
321    #[cfg(feature = "validation")]
322    fn validate_snapshot(&mut self, snapshot: &Snapshot) -> SnapshotResult<()> {
323        let start = Instant::now();
324
325        // Validate content
326        let content_result = self.validator.validate_content(snapshot)?;
327        if !content_result.is_ok() {
328            if self.config.validation_enabled {
329                return Err(SnapshotError::Validation {
330                    message: "Snapshot content validation failed".to_string(),
331                });
332            } else {
333                log::warn!("Snapshot content validation failed, but continuing");
334            }
335        }
336
337        // Validate compatibility
338        let compat_result = self.validator.validate_compatibility(snapshot)?;
339        if !compat_result.is_ok() {
340            log::warn!("Snapshot compatibility issues detected");
341            for warning in compat_result.warnings {
342                log::warn!("Compatibility: {}", warning.message);
343            }
344        }
345
346        self.stats.validation_time = start.elapsed();
347        log::debug!("Snapshot validated in {:?}", self.stats.validation_time);
348
349        Ok(())
350    }
351
352    /// Initialize runtime integration
353    fn initialize_runtime_integration(&mut self, snapshot: &Snapshot) -> SnapshotResult<()> {
354        let start = Instant::now();
355
356        // Initialize builtin dispatch table
357        self.initialize_builtin_dispatch(&snapshot.builtins)?;
358
359        // Apply optimization hints
360        self.apply_optimization_hints(&snapshot.optimization_hints)?;
361
362        // Configure GC with presets
363        self.configure_gc(&snapshot.gc_presets)?;
364
365        self.stats.initialization_time = start.elapsed();
366        log::debug!(
367            "Runtime integration initialized in {:?}",
368            self.stats.initialization_time
369        );
370
371        Ok(())
372    }
373
374    /// Initialize builtin function dispatch table
375    fn initialize_builtin_dispatch(&self, registry: &crate::BuiltinRegistry) -> SnapshotResult<()> {
376        // Get current builtins from runtime
377        let current_builtins = runmat_builtins::builtin_functions();
378        let mut dispatch_table = Vec::with_capacity(registry.functions.len());
379
380        // Build dispatch table by matching names
381        for function_meta in &registry.functions {
382            if let Some(builtin) = current_builtins
383                .iter()
384                .find(|b| b.name == function_meta.name)
385            {
386                dispatch_table.push(builtin.implementation);
387            } else {
388                log::warn!(
389                    "Builtin function '{}' not found in current runtime",
390                    function_meta.name
391                );
392                // Use a placeholder function that returns an error
393                dispatch_table
394                    .push(|_args| Err("Function not available in current runtime".to_string()));
395            }
396        }
397
398        // Update the registry's dispatch table
399        {
400            let mut table = registry.dispatch_table.write();
401            *table = dispatch_table;
402        }
403
404        log::debug!(
405            "Initialized dispatch table with {} functions",
406            registry.functions.len()
407        );
408        Ok(())
409    }
410
411    /// Apply optimization hints to runtime
412    fn apply_optimization_hints(&self, hints: &crate::OptimizationHints) -> SnapshotResult<()> {
413        // Apply JIT hints
414        for hint in &hints.jit_hints {
415            log::debug!(
416                "JIT hint: {} ({:?}) - expected gain: {:.1}x",
417                hint.pattern,
418                hint.hint_type,
419                hint.expected_performance_gain
420            );
421            // In a full implementation, these would be passed to the JIT compiler
422        }
423
424        // Apply memory hints
425        for hint in &hints.memory_hints {
426            log::debug!(
427                "Memory hint: {} ({:?}) - alignment: {}",
428                hint.data_structure,
429                hint.hint_type,
430                hint.alignment
431            );
432            // In a full implementation, these would configure memory layout
433        }
434
435        // Apply execution hints
436        for hint in &hints.execution_hints {
437            log::debug!(
438                "Execution hint: {} ({:?}) - frequency: {}",
439                hint.pattern,
440                hint.hint_type,
441                hint.frequency
442            );
443            // In a full implementation, these would configure execution strategies
444        }
445
446        Ok(())
447    }
448
449    /// Configure GC with snapshot presets
450    fn configure_gc(&self, presets: &crate::GcPresetCache) -> SnapshotResult<()> {
451        if let Some(default_config) = presets.presets.get(&presets.default_preset) {
452            match runmat_gc::gc_configure(default_config.clone()) {
453                Ok(_) => {
454                    log::debug!("GC configured with preset '{}'", presets.default_preset);
455                }
456                Err(e) => {
457                    log::warn!("Failed to configure GC with snapshot preset: {e}");
458                }
459            }
460        }
461
462        Ok(())
463    }
464
465    /// Get loading statistics
466    pub fn stats(&self) -> &LoadingStats {
467        &self.stats
468    }
469
470    /// Clear memory-mapped file cache
471    pub fn clear_cache(&mut self) {
472        let mut cache = self.mmap_cache.write();
473        cache.clear();
474        log::debug!("Memory mapping cache cleared");
475    }
476}
477
478impl FormatLoader {
479    /// Read snapshot header from file
480    fn read_header(&mut self) -> SnapshotResult<SnapshotHeader> {
481        // Check configuration for memory mapping preference and validation
482        let use_mmap = self.config.memory_mapping_enabled;
483        let validate_data = self.config.validation_enabled;
484
485        if use_mmap && self.mmap.is_some() {
486            // Use memory mapping
487            let mmap_data = self.mmap.as_ref().unwrap();
488
489            // Read header size (4 bytes, little-endian)
490            if mmap_data.len() < 4 {
491                return Err(crate::SnapshotError::Io(std::io::Error::new(
492                    std::io::ErrorKind::UnexpectedEof,
493                    "File too small to contain header size",
494                )));
495            }
496
497            let header_size =
498                u32::from_le_bytes([mmap_data[0], mmap_data[1], mmap_data[2], mmap_data[3]])
499                    as usize;
500
501            // Read header data
502            if mmap_data.len() < 4 + header_size {
503                return Err(crate::SnapshotError::Io(std::io::Error::new(
504                    std::io::ErrorKind::UnexpectedEof,
505                    "File too small to contain header",
506                )));
507            }
508
509            let header_data = &mmap_data[4..4 + header_size];
510            let header: SnapshotHeader = bincode::deserialize(header_data)
511                .context("Failed to deserialize header from memory map")
512                .map_err(|e| crate::SnapshotError::Configuration {
513                    message: e.to_string(),
514                })?;
515
516            // Validate header if configuration requires it
517            if validate_data {
518                header.validate()?;
519            }
520            Ok(header)
521        } else {
522            // Use regular file I/O
523            let mut reader = BufReader::new(&self.file);
524            reader.seek(SeekFrom::Start(0))?;
525
526            // Read header size (4 bytes, little-endian)
527            let mut size_buffer = [0u8; 4];
528            reader.read_exact(&mut size_buffer)?;
529            let header_size = u32::from_le_bytes(size_buffer) as usize;
530
531            // Read header data
532            let mut header_buffer = vec![0u8; header_size];
533            reader.read_exact(&mut header_buffer)?;
534
535            let header: SnapshotHeader = bincode::deserialize(&header_buffer)
536                .context("Failed to deserialize header")
537                .map_err(|e| crate::SnapshotError::Configuration {
538                    message: e.to_string(),
539                })?;
540
541            // Validate header if configuration requires it
542            if validate_data {
543                header.validate()?;
544            }
545            Ok(header)
546        }
547    }
548
549    /// Read data section from file
550    fn read_data_section(&self) -> SnapshotResult<Vec<u8>> {
551        if let Some(ref mmap) = self.mmap {
552            // Use memory mapping
553            // Account for 4-byte header size prefix + actual header size
554            let header_size = bincode::serialized_size(&self.header)? as usize;
555            let data_start = 4 + header_size; // 4 bytes for size + header
556            let data_end = data_start + self.header.data_info.compressed_size;
557
558            if data_end > mmap.len() {
559                return Err(SnapshotError::Corrupted {
560                    reason: "Data section extends beyond file".to_string(),
561                });
562            }
563
564            Ok(mmap[data_start..data_end].to_vec())
565        } else {
566            // Use regular file I/O
567            let file = &self.file;
568            // Account for 4-byte header size prefix + actual header size
569            let header_size = bincode::serialized_size(&self.header)? as u64;
570            let data_start = 4 + header_size; // 4 bytes for size + header
571            let mut reader = BufReader::new(file);
572
573            reader.seek(SeekFrom::Start(data_start))?;
574
575            let mut data = vec![0u8; self.header.data_info.compressed_size];
576            reader.read_exact(&mut data)?;
577
578            Ok(data)
579        }
580    }
581}
582
583/// Utility functions for snapshot loading
584impl SnapshotLoader {
585    /// Preload snapshot header for quick validation
586    pub fn peek_header<P: AsRef<Path>>(path: P) -> SnapshotResult<SnapshotHeader> {
587        let file = File::open(path.as_ref())
588            .with_context(|| format!("Failed to open snapshot file: {}", path.as_ref().display()))
589            .map_err(|e| crate::SnapshotError::Configuration {
590                message: e.to_string(),
591            })?;
592
593        let mut format_loader = FormatLoader {
594            file,
595            mmap: None,
596            header: SnapshotHeader::new(SnapshotMetadata::current()),
597            config: SnapshotConfig::default(),
598        };
599
600        format_loader.read_header()
601    }
602
603    /// Check if snapshot file is valid without full loading
604    pub fn quick_validate<P: AsRef<Path>>(path: P) -> SnapshotResult<bool> {
605        match Self::peek_header(path) {
606            Ok(header) => Ok(header.validate().is_ok()),
607            Err(_) => Ok(false),
608        }
609    }
610
611    /// Get snapshot metadata without loading content
612    pub fn get_metadata<P: AsRef<Path>>(path: P) -> SnapshotResult<SnapshotMetadata> {
613        let header = Self::peek_header(path)?;
614        Ok(header.metadata)
615    }
616
617    /// Estimate loading time based on snapshot header
618    pub fn estimate_load_time<P: AsRef<Path>>(path: P) -> SnapshotResult<Duration> {
619        let header = Self::peek_header(path)?;
620        Ok(header.estimated_load_time())
621    }
622}
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627
628    #[test]
629    fn test_loader_creation() {
630        let config = SnapshotConfig::default();
631        let loader = SnapshotLoader::new(config);
632        assert_eq!(loader.stats.load_time, Duration::ZERO);
633    }
634
635    #[test]
636    fn test_quick_validate_nonexistent() {
637        assert!(!SnapshotLoader::quick_validate("nonexistent.snapshot").unwrap_or(true));
638    }
639
640    #[test]
641    fn test_header_peek() {
642        // This would require a real snapshot file
643        // For now, just test that the function exists
644        let result = SnapshotLoader::peek_header("nonexistent.snapshot");
645        assert!(result.is_err());
646    }
647
648    #[test]
649    fn test_metadata_extraction() {
650        // This would require a real snapshot file
651        let result = SnapshotLoader::get_metadata("nonexistent.snapshot");
652        assert!(result.is_err());
653    }
654}