runmat_snapshot/
format.rs

1//! Snapshot file format and serialization
2//!
3//! High-performance binary format optimized for fast loading and validation.
4//! Uses a structured layout with versioning and integrity checks.
5
6use std::time::{Duration, SystemTime};
7
8use serde::{Deserialize, Serialize};
9
10/// Snapshot file format magic number
11pub const SNAPSHOT_MAGIC: &[u8; 8] = b"RUSTMAT\x01";
12
13/// Current snapshot format version
14pub const SNAPSHOT_VERSION: u32 = 1;
15
16/// Snapshot file format structure
17#[derive(Debug, Clone)]
18pub struct SnapshotFormat {
19    /// File header
20    pub header: SnapshotHeader,
21
22    /// Compressed snapshot data
23    pub data: Vec<u8>,
24
25    /// Optional integrity checksum
26    pub checksum: Option<Vec<u8>>,
27}
28
29/// Snapshot file header
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct SnapshotHeader {
32    /// Magic number for format identification
33    pub magic: [u8; 8],
34
35    /// Format version
36    pub version: u32,
37
38    /// Snapshot metadata
39    pub metadata: SnapshotMetadata,
40
41    /// Data section info
42    pub data_info: DataSectionInfo,
43
44    /// Checksum info (if enabled)
45    pub checksum_info: Option<ChecksumInfo>,
46
47    /// Header size (for format evolution)
48    pub header_size: u32,
49}
50
51/// Snapshot metadata
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct SnapshotMetadata {
54    /// Creation timestamp
55    pub created_at: SystemTime,
56
57    /// RunMat version used to create snapshot
58    pub runmat_version: String,
59
60    /// Snapshot creation tool version
61    pub tool_version: String,
62
63    /// Build configuration used
64    pub build_config: BuildConfig,
65
66    /// Performance characteristics
67    pub performance_metrics: PerformanceMetrics,
68
69    /// Feature flags enabled during creation
70    pub feature_flags: Vec<String>,
71
72    /// Target platform information
73    pub target_platform: PlatformInfo,
74}
75
76/// Build configuration
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct BuildConfig {
79    /// Optimization level used
80    pub optimization_level: String,
81
82    /// Debug information included
83    pub debug_info: bool,
84
85    /// Compiler used
86    pub compiler: String,
87
88    /// Compilation flags
89    pub compile_flags: Vec<String>,
90
91    /// Features enabled
92    pub enabled_features: Vec<String>,
93}
94
95/// Performance metrics from snapshot creation
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct PerformanceMetrics {
98    /// Time to create snapshot
99    pub creation_time: Duration,
100
101    /// Number of builtins captured
102    pub builtin_count: usize,
103
104    /// HIR cache entries
105    pub hir_cache_entries: usize,
106
107    /// Bytecode cache entries
108    pub bytecode_cache_entries: usize,
109
110    /// Total uncompressed size
111    pub uncompressed_size: usize,
112
113    /// Compression ratio achieved
114    pub compression_ratio: f64,
115
116    /// Memory usage during creation
117    pub peak_memory_usage: usize,
118}
119
120/// Target platform information
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct PlatformInfo {
123    /// Operating system
124    pub os: String,
125
126    /// Architecture
127    pub arch: String,
128
129    /// CPU features available
130    pub cpu_features: Vec<String>,
131
132    /// Memory page size
133    pub page_size: usize,
134
135    /// Cache line size
136    pub cache_line_size: usize,
137
138    /// Endianness
139    pub endianness: Endianness,
140}
141
142/// Endianness information
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub enum Endianness {
145    Little,
146    Big,
147}
148
149/// Data section information
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct DataSectionInfo {
152    /// Compression algorithm used
153    pub compression: CompressionInfo,
154
155    /// Uncompressed data size
156    pub uncompressed_size: usize,
157
158    /// Compressed data size
159    pub compressed_size: usize,
160
161    /// Data section offset in file
162    pub data_offset: u64,
163
164    /// Alignment requirements
165    pub alignment: usize,
166}
167
168/// Compression information
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct CompressionInfo {
171    /// Compression algorithm
172    pub algorithm: CompressionAlgorithm,
173
174    /// Compression level
175    pub level: u32,
176
177    /// Algorithm-specific parameters
178    pub parameters: std::collections::HashMap<String, String>,
179}
180
181/// Compression algorithms
182#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
183pub enum CompressionAlgorithm {
184    None,
185    Lz4 { fast: bool },
186    Zstd { dictionary: Option<Vec<u8>> },
187}
188
189/// Checksum information
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct ChecksumInfo {
192    /// Checksum algorithm
193    pub algorithm: ChecksumAlgorithm,
194
195    /// Checksum size in bytes
196    pub size: usize,
197
198    /// Checksum offset in file
199    pub offset: u64,
200}
201
202/// Checksum algorithms
203#[derive(Debug, Clone, Serialize, Deserialize)]
204pub enum ChecksumAlgorithm {
205    Sha256,
206    Blake3,
207    Crc32,
208}
209
210impl SnapshotHeader {
211    /// Create a new snapshot header
212    pub fn new(metadata: SnapshotMetadata) -> Self {
213        Self {
214            magic: *SNAPSHOT_MAGIC,
215            version: SNAPSHOT_VERSION,
216            metadata,
217            data_info: DataSectionInfo {
218                compression: CompressionInfo {
219                    algorithm: CompressionAlgorithm::None,
220                    level: 0,
221                    parameters: std::collections::HashMap::new(),
222                },
223                uncompressed_size: 0,
224                compressed_size: 0,
225                data_offset: 0,
226                alignment: 8,
227            },
228            checksum_info: None,
229            header_size: 0, // Will be calculated during serialization
230        }
231    }
232
233    /// Validate header magic and version
234    pub fn validate(&self) -> crate::SnapshotResult<()> {
235        if self.magic != *SNAPSHOT_MAGIC {
236            return Err(crate::SnapshotError::Corrupted {
237                reason: "Invalid magic number".to_string(),
238            });
239        }
240
241        if self.version > SNAPSHOT_VERSION {
242            return Err(crate::SnapshotError::VersionMismatch {
243                expected: SNAPSHOT_VERSION.to_string(),
244                found: self.version.to_string(),
245            });
246        }
247
248        Ok(())
249    }
250
251    /// Check if snapshot is compatible with current platform
252    pub fn is_platform_compatible(&self) -> bool {
253        let current_os = std::env::consts::OS;
254        let current_arch = std::env::consts::ARCH;
255
256        self.metadata.target_platform.os == current_os
257            && self.metadata.target_platform.arch == current_arch
258    }
259
260    /// Get expected loading performance characteristics
261    pub fn estimated_load_time(&self) -> Duration {
262        // Estimate based on data size and compression
263        let base_time = Duration::from_millis(10); // Base overhead
264        let data_time = Duration::from_nanos(
265            (self.data_info.compressed_size as u64 * 10) / 1024, // ~10ns per KB
266        );
267
268        match self.data_info.compression.algorithm {
269            CompressionAlgorithm::None => base_time + data_time,
270            CompressionAlgorithm::Lz4 { .. } => base_time + data_time * 2,
271            CompressionAlgorithm::Zstd { .. } => base_time + data_time * 4,
272        }
273    }
274}
275
276impl SnapshotMetadata {
277    /// Create metadata for current environment
278    pub fn current() -> Self {
279        Self {
280            created_at: SystemTime::now(),
281            runmat_version: env!("CARGO_PKG_VERSION").to_string(),
282            tool_version: env!("CARGO_PKG_VERSION").to_string(),
283            build_config: BuildConfig::current(),
284            performance_metrics: PerformanceMetrics::default(),
285            feature_flags: Self::detect_feature_flags(),
286            target_platform: PlatformInfo::current(),
287        }
288    }
289
290    /// Detect active feature flags
291    #[allow(clippy::vec_init_then_push)] // Conditional compilation makes vec![] problematic
292    fn detect_feature_flags() -> Vec<String> {
293        let mut flags = Vec::new();
294
295        #[cfg(feature = "compression")]
296        flags.push("compression".to_string());
297
298        #[cfg(feature = "validation")]
299        flags.push("validation".to_string());
300
301        #[cfg(feature = "blas-lapack")]
302        flags.push("blas-lapack".to_string());
303
304        flags
305    }
306
307    /// Check compatibility with current environment
308    pub fn is_compatible(&self) -> bool {
309        // Check major version compatibility
310        let current_version = env!("CARGO_PKG_VERSION");
311        let current_major = current_version.split('.').next().unwrap_or("0");
312        let snapshot_major = self.runmat_version.split('.').next().unwrap_or("0");
313
314        current_major == snapshot_major
315    }
316
317    /// Get human-readable age of snapshot
318    pub fn age(&self) -> Duration {
319        SystemTime::now()
320            .duration_since(self.created_at)
321            .unwrap_or(Duration::ZERO)
322    }
323}
324
325impl BuildConfig {
326    /// Detect current build configuration
327    pub fn current() -> Self {
328        Self {
329            optimization_level: if cfg!(debug_assertions) {
330                "debug".to_string()
331            } else {
332                "release".to_string()
333            },
334            debug_info: cfg!(debug_assertions),
335            compiler: format!(
336                "rustc {}",
337                option_env!("RUSTC_VERSION").unwrap_or("unknown")
338            ),
339            compile_flags: Vec::new(), // Would need to be passed from build system
340            enabled_features: Vec::new(), // Would need feature detection
341        }
342    }
343}
344
345impl Default for PerformanceMetrics {
346    fn default() -> Self {
347        Self {
348            creation_time: Duration::ZERO,
349            builtin_count: 0,
350            hir_cache_entries: 0,
351            bytecode_cache_entries: 0,
352            uncompressed_size: 0,
353            compression_ratio: 1.0,
354            peak_memory_usage: 0,
355        }
356    }
357}
358
359impl PlatformInfo {
360    /// Detect current platform information
361    pub fn current() -> Self {
362        Self {
363            os: std::env::consts::OS.to_string(),
364            arch: std::env::consts::ARCH.to_string(),
365            cpu_features: Self::detect_cpu_features(),
366            page_size: Self::detect_page_size(),
367            cache_line_size: Self::detect_cache_line_size(),
368            endianness: if cfg!(target_endian = "little") {
369                Endianness::Little
370            } else {
371                Endianness::Big
372            },
373        }
374    }
375
376    /// Detect available CPU features
377    fn detect_cpu_features() -> Vec<String> {
378        let mut features = Vec::new();
379
380        #[cfg(target_arch = "x86_64")]
381        {
382            if std::arch::is_x86_feature_detected!("sse4.2") {
383                features.push("sse4.2".to_string());
384            }
385            if std::arch::is_x86_feature_detected!("avx") {
386                features.push("avx".to_string());
387            }
388            if std::arch::is_x86_feature_detected!("avx2") {
389                features.push("avx2".to_string());
390            }
391            if std::arch::is_x86_feature_detected!("fma") {
392                features.push("fma".to_string());
393            }
394        }
395
396        #[cfg(target_arch = "aarch64")]
397        {
398            if std::arch::is_aarch64_feature_detected!("neon") {
399                features.push("neon".to_string());
400            }
401        }
402
403        features
404    }
405
406    /// Detect memory page size
407    fn detect_page_size() -> usize {
408        // Default to common page sizes
409        #[cfg(unix)]
410        {
411            unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize }
412        }
413        #[cfg(not(unix))]
414        {
415            4096 // Common default
416        }
417    }
418
419    /// Detect CPU cache line size
420    fn detect_cache_line_size() -> usize {
421        // Use common default, could be detected more precisely
422        64
423    }
424}
425
426impl SnapshotFormat {
427    /// Create a new snapshot format
428    pub fn new(header: SnapshotHeader, data: Vec<u8>) -> Self {
429        Self {
430            header,
431            data,
432            checksum: None,
433        }
434    }
435
436    /// Calculate and set checksum
437    pub fn with_checksum(mut self, algorithm: ChecksumAlgorithm) -> crate::SnapshotResult<Self> {
438        #[cfg(feature = "validation")]
439        {
440            use sha2::{Digest, Sha256};
441
442            let checksum = match algorithm {
443                ChecksumAlgorithm::Sha256 => {
444                    let mut hasher = Sha256::new();
445                    hasher.update(&self.data);
446                    hasher.finalize().to_vec()
447                }
448                ChecksumAlgorithm::Blake3 => blake3::hash(&self.data).as_bytes().to_vec(),
449                ChecksumAlgorithm::Crc32 => {
450                    let crc = crc32fast::hash(&self.data);
451                    crc.to_le_bytes().to_vec()
452                }
453            };
454
455            self.checksum = Some(checksum.clone());
456            self.header.checksum_info = Some(ChecksumInfo {
457                algorithm,
458                size: checksum.len(),
459                offset: 0, // Will be set during serialization
460            });
461        }
462        #[cfg(not(feature = "validation"))]
463        {
464            return Err(crate::SnapshotError::Configuration {
465                message: "Validation feature not enabled".to_string(),
466            });
467        }
468
469        Ok(self)
470    }
471
472    /// Validate checksum
473    pub fn validate_checksum(&self) -> crate::SnapshotResult<bool> {
474        #[cfg(feature = "validation")]
475        {
476            if let (Some(checksum_info), Some(stored_checksum)) =
477                (&self.header.checksum_info, &self.checksum)
478            {
479                use sha2::{Digest, Sha256};
480
481                let calculated_checksum = match checksum_info.algorithm {
482                    ChecksumAlgorithm::Sha256 => {
483                        let mut hasher = Sha256::new();
484                        hasher.update(&self.data);
485                        hasher.finalize().to_vec()
486                    }
487                    ChecksumAlgorithm::Blake3 => blake3::hash(&self.data).as_bytes().to_vec(),
488                    ChecksumAlgorithm::Crc32 => {
489                        let crc = crc32fast::hash(&self.data);
490                        crc.to_le_bytes().to_vec()
491                    }
492                };
493
494                Ok(calculated_checksum == *stored_checksum)
495            } else {
496                Ok(true) // No checksum to validate
497            }
498        }
499        #[cfg(not(feature = "validation"))]
500        {
501            Ok(true) // Skip validation if feature disabled
502        }
503    }
504
505    /// Get total file size
506    pub fn total_size(&self) -> usize {
507        let header_size = bincode::serialized_size(&self.header).unwrap_or(0) as usize;
508        let data_size = self.data.len();
509        let checksum_size = self.checksum.as_ref().map_or(0, |c| c.len());
510
511        header_size + data_size + checksum_size
512    }
513}
514
515#[cfg(test)]
516mod tests {
517    use super::*;
518
519    #[test]
520    fn test_snapshot_header_validation() {
521        let metadata = SnapshotMetadata::current();
522        let header = SnapshotHeader::new(metadata);
523
524        assert!(header.validate().is_ok());
525        assert_eq!(header.magic, *SNAPSHOT_MAGIC);
526        assert_eq!(header.version, SNAPSHOT_VERSION);
527    }
528
529    #[test]
530    fn test_platform_compatibility() {
531        let metadata = SnapshotMetadata::current();
532        let header = SnapshotHeader::new(metadata);
533
534        assert!(header.is_platform_compatible());
535    }
536
537    #[test]
538    fn test_metadata_compatibility() {
539        let metadata = SnapshotMetadata::current();
540        assert!(metadata.is_compatible());
541    }
542
543    #[test]
544    fn test_platform_info() {
545        let platform = PlatformInfo::current();
546        assert!(!platform.os.is_empty());
547        assert!(!platform.arch.is_empty());
548        assert!(platform.page_size > 0);
549        assert!(platform.cache_line_size > 0);
550    }
551
552    #[test]
553    fn test_build_config() {
554        let config = BuildConfig::current();
555        assert!(!config.optimization_level.is_empty());
556        assert!(!config.compiler.is_empty());
557    }
558
559    #[test]
560    fn test_snapshot_format_creation() {
561        let metadata = SnapshotMetadata::current();
562        let header = SnapshotHeader::new(metadata);
563        let data = vec![1, 2, 3, 4, 5];
564        let format = SnapshotFormat::new(header, data);
565
566        assert_eq!(format.data.len(), 5);
567        assert!(format.checksum.is_none());
568    }
569
570    #[cfg(feature = "validation")]
571    #[test]
572    fn test_checksum_generation() {
573        let metadata = SnapshotMetadata::current();
574        let header = SnapshotHeader::new(metadata);
575        let data = vec![1, 2, 3, 4, 5];
576        let format = SnapshotFormat::new(header, data);
577
578        let format_with_checksum = format.with_checksum(ChecksumAlgorithm::Sha256).unwrap();
579
580        assert!(format_with_checksum.checksum.is_some());
581        assert!(format_with_checksum.header.checksum_info.is_some());
582        assert!(format_with_checksum.validate_checksum().unwrap());
583    }
584
585    #[test]
586    fn test_estimated_load_time() {
587        let metadata = SnapshotMetadata::current();
588        let mut header = SnapshotHeader::new(metadata);
589        header.data_info.compressed_size = 1024 * 1024; // 1MB
590
591        let load_time = header.estimated_load_time();
592        assert!(load_time > Duration::ZERO);
593    }
594}