Skip to main content

hexz_core/format/
header.rs

1//! Snapshot file header and related enums.
2
3use hexz_common::constants::DEFAULT_BLOCK_SIZE;
4use hexz_common::crypto::KeyDerivationParams;
5use serde::{Deserialize, Serialize};
6
7use super::magic::{FORMAT_VERSION, MAGIC_BYTES};
8
9/// On-disk snapshot file header containing format metadata.
10///
11/// This structure is serialized at the beginning of every `.hxz` file and
12/// describes the format version, compression settings, encryption parameters,
13/// and locations of key data structures within the file.
14///
15/// # Binary Layout
16///
17/// The header occupies exactly 4096 bytes (HEADER_SIZE) at file offset 0 with
18/// the following logical structure:
19/// - Magic bytes (4): File signature "HEXZ"
20/// - Version (4): Format version number
21/// - Block size (4): Logical block size in bytes
22/// - Index offset (8): File offset to the master index structure
23/// - Parent path (variable): Optional path for thin snapshots
24/// - Dictionary offset/length: Optional compression dictionary location
25/// - Metadata offset/length: Optional user metadata location
26/// - Signature offset/length: Optional cryptographic signature location
27/// - Encryption parameters: Optional key derivation settings
28/// - Compression type: Algorithm used (LZ4 or Zstd)
29/// - Feature flags: Capabilities enabled in this snapshot
30///
31/// # Versioning
32///
33/// The version field enables forward compatibility. Readers check this field
34/// and reject files with incompatible versions. The current format version is
35/// defined in the `magic` module.
36///
37/// # Thin Provisioning
38///
39/// When `parent_path` is set, this snapshot is a thin snapshot that references
40/// blocks from the parent. Blocks marked with [`BLOCK_OFFSET_PARENT`] are
41/// read from the parent snapshot instead of the current file.
42///
43/// [`BLOCK_OFFSET_PARENT`]: hexz_common::constants::BLOCK_OFFSET_PARENT
44#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
45pub struct Header {
46    pub magic: [u8; 4],
47    pub version: u32,
48    pub block_size: u32,
49    pub index_offset: u64,
50
51    /// Path to the parent snapshot for thin provisioning.
52    /// If None, this is a standalone (thick) snapshot.
53    pub parent_path: Option<String>,
54
55    pub dictionary_offset: Option<u64>,
56    pub dictionary_length: Option<u32>,
57    pub metadata_offset: Option<u64>,
58    pub metadata_length: Option<u32>,
59    pub signature_offset: Option<u64>,
60    pub signature_length: Option<u32>,
61    pub encryption: Option<KeyDerivationParams>,
62    pub compression: CompressionType,
63    pub features: FeatureFlags,
64}
65
66/// Compression algorithm used for block data.
67///
68/// This enum specifies which compression algorithm was used to compress
69/// the data blocks stored in the snapshot file. The compressor must be
70/// configured appropriately when reading the file.
71///
72/// # Supported Algorithms
73///
74/// - **LZ4**: Fast compression with lower ratios, ideal for latency-sensitive workloads
75/// - **Zstd**: Balanced compression with optional dictionary training for higher ratios
76///
77/// # Performance Characteristics
78///
79/// - LZ4: ~500 MB/s compression, ~2000 MB/s decompression (single-threaded)
80/// - Zstd (level 3): ~200 MB/s compression, ~600 MB/s decompression (single-threaded)
81///
82/// The actual performance depends on data characteristics, CPU capabilities, and
83/// whether dictionary compression is enabled for Zstd.
84#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
85pub enum CompressionType {
86    /// LZ4 compression algorithm (fast, lower ratio)
87    Lz4,
88    /// Zstandard compression algorithm (balanced, supports dictionaries)
89    Zstd,
90}
91
92/// Feature flags indicating capabilities enabled in this snapshot.
93///
94/// These boolean flags describe which optional features are present in the
95/// snapshot file. Readers must check these flags to determine how to
96/// interpret the file contents.
97///
98/// # Fields
99///
100/// - `has_disk`: Snapshot contains disk state (disk stream present in index)
101/// - `has_memory`: Snapshot contains memory state (memory stream present in index)
102/// - `variable_blocks`: Content-defined chunking (CDC) was used instead of fixed-size blocks
103///
104/// # Usage
105///
106/// When both `has_disk` and `has_memory` are true, the snapshot is a full VM
107/// snapshot that can be used for live migration or checkpoint/restore. When
108/// only `has_disk` is true, it's a disk-only snapshot suitable for boot or backup.
109///
110/// The `variable_blocks` flag indicates that block sizes vary (CDC mode) and
111/// readers must use the `logical_len` field from each [`BlockInfo`] rather than
112/// assuming a fixed block size.
113///
114/// [`BlockInfo`]: super::index::BlockInfo
115#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
116pub struct FeatureFlags {
117    /// Snapshot contains disk state
118    pub has_disk: bool,
119    /// Snapshot contains memory state
120    pub has_memory: bool,
121    /// Content-defined chunking (CDC) was used for variable-sized blocks
122    pub variable_blocks: bool,
123}
124
125impl Header {
126    /// Read and deserialize a header from a [`std::io::Read`] source.
127    pub fn read_from<R: std::io::Read>(reader: &mut R) -> hexz_common::Result<Self> {
128        let mut header_bytes = [0u8; super::magic::HEADER_SIZE];
129        reader.read_exact(&mut header_bytes)?;
130        let header: Header = bincode::deserialize(&header_bytes)?;
131        Ok(header)
132    }
133
134    /// Read a header from a [`StorageBackend`](crate::store::StorageBackend) at offset 0.
135    pub fn read_from_backend(
136        backend: &dyn crate::store::StorageBackend,
137    ) -> hexz_common::Result<Self> {
138        let header_bytes = backend.read_exact(0, super::magic::HEADER_SIZE)?;
139        let header: Header = bincode::deserialize(&header_bytes)?;
140        Ok(header)
141    }
142
143    /// Load the compression dictionary from the backend, if present.
144    pub fn load_dictionary(
145        &self,
146        backend: &dyn crate::store::StorageBackend,
147    ) -> hexz_common::Result<Option<Vec<u8>>> {
148        if let (Some(offset), Some(length)) = (self.dictionary_offset, self.dictionary_length) {
149            Ok(Some(backend.read_exact(offset, length as usize)?.to_vec()))
150        } else {
151            Ok(None)
152        }
153    }
154}
155
156impl Default for Header {
157    fn default() -> Self {
158        Self {
159            magic: *MAGIC_BYTES,
160            version: FORMAT_VERSION,
161            block_size: DEFAULT_BLOCK_SIZE,
162            index_offset: 0,
163            parent_path: None,
164            dictionary_offset: None,
165            dictionary_length: None,
166            metadata_offset: None,
167            metadata_length: None,
168            signature_offset: None,
169            signature_length: None,
170            encryption: None,
171            compression: CompressionType::Lz4,
172            features: FeatureFlags::default(),
173        }
174    }
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    #[test]
182    fn test_hexz_header_default() {
183        let header = Header::default();
184
185        assert_eq!(header.magic, *MAGIC_BYTES);
186        assert_eq!(header.version, FORMAT_VERSION);
187        assert_eq!(header.block_size, DEFAULT_BLOCK_SIZE);
188        assert_eq!(header.index_offset, 0);
189        assert!(header.parent_path.is_none());
190        assert!(header.dictionary_offset.is_none());
191        assert!(header.dictionary_length.is_none());
192        assert!(header.metadata_offset.is_none());
193        assert!(header.metadata_length.is_none());
194        assert!(header.signature_offset.is_none());
195        assert!(header.signature_length.is_none());
196        assert!(header.encryption.is_none());
197        assert_eq!(header.compression, CompressionType::Lz4);
198        assert!(!header.features.has_disk);
199        assert!(!header.features.has_memory);
200        assert!(!header.features.variable_blocks);
201    }
202
203    #[test]
204    fn test_hexz_header_with_disk() {
205        let mut header = Header::default();
206        header.features.has_disk = true;
207        header.index_offset = 1048576;
208
209        assert!(header.features.has_disk);
210        assert_eq!(header.index_offset, 1048576);
211    }
212
213    #[test]
214    fn test_hexz_header_with_memory() {
215        let mut header = Header::default();
216        header.features.has_memory = true;
217
218        assert!(header.features.has_memory);
219    }
220
221    #[test]
222    fn test_hexz_header_with_both_streams() {
223        let mut header = Header::default();
224        header.features.has_disk = true;
225        header.features.has_memory = true;
226
227        assert!(header.features.has_disk);
228        assert!(header.features.has_memory);
229    }
230
231    #[test]
232    #[allow(clippy::field_reassign_with_default)]
233    fn test_hexz_header_with_parent_path() {
234        let mut header = Header::default();
235        header.parent_path = Some("/path/to/parent.hxz".to_string());
236
237        assert_eq!(header.parent_path.as_deref(), Some("/path/to/parent.hxz"));
238    }
239
240    #[test]
241    #[allow(clippy::field_reassign_with_default)]
242    fn test_hexz_header_with_dictionary() {
243        let mut header = Header::default();
244        header.dictionary_offset = Some(4096);
245        header.dictionary_length = Some(16384);
246
247        assert_eq!(header.dictionary_offset, Some(4096));
248        assert_eq!(header.dictionary_length, Some(16384));
249    }
250
251    #[test]
252    #[allow(clippy::field_reassign_with_default)]
253    fn test_hexz_header_with_metadata() {
254        let mut header = Header::default();
255        header.metadata_offset = Some(20480);
256        header.metadata_length = Some(1024);
257
258        assert_eq!(header.metadata_offset, Some(20480));
259        assert_eq!(header.metadata_length, Some(1024));
260    }
261
262    #[test]
263    #[allow(clippy::field_reassign_with_default)]
264    fn test_hexz_header_with_signature() {
265        let mut header = Header::default();
266        header.signature_offset = Some(24576);
267        header.signature_length = Some(256);
268
269        assert_eq!(header.signature_offset, Some(24576));
270        assert_eq!(header.signature_length, Some(256));
271    }
272
273    #[test]
274    #[allow(clippy::field_reassign_with_default)]
275    fn test_hexz_header_with_encryption() {
276        let mut header = Header::default();
277        header.encryption = Some(KeyDerivationParams {
278            salt: [0x42; 16],
279            iterations: 100000,
280        });
281
282        assert!(header.encryption.is_some());
283        let params = header.encryption.unwrap();
284        assert_eq!(params.salt, [0x42; 16]);
285        assert_eq!(params.iterations, 100000);
286    }
287
288    #[test]
289    #[allow(clippy::field_reassign_with_default)]
290    fn test_hexz_header_zstd_compression() {
291        let mut header = Header::default();
292        header.compression = CompressionType::Zstd;
293
294        assert_eq!(header.compression, CompressionType::Zstd);
295    }
296
297    #[test]
298    fn test_hexz_header_variable_blocks() {
299        let mut header = Header::default();
300        header.features.variable_blocks = true;
301
302        assert!(header.features.variable_blocks);
303    }
304
305    #[test]
306    fn test_hexz_header_serialization() {
307        let header = Header::default();
308
309        let bytes = bincode::serialize(&header).unwrap();
310        let deserialized: Header = bincode::deserialize(&bytes).unwrap();
311
312        assert_eq!(deserialized, header);
313    }
314
315    #[test]
316    fn test_hexz_header_serialization_with_all_fields() {
317        let header = Header {
318            magic: *MAGIC_BYTES,
319            version: FORMAT_VERSION,
320            block_size: 65536,
321            index_offset: 1048576,
322            parent_path: Some("/parent.hxz".to_string()),
323            dictionary_offset: Some(4096),
324            dictionary_length: Some(16384),
325            metadata_offset: Some(20480),
326            metadata_length: Some(1024),
327            signature_offset: Some(24576),
328            signature_length: Some(256),
329            encryption: Some(KeyDerivationParams {
330                salt: [0x42; 16],
331                iterations: 100000,
332            }),
333            compression: CompressionType::Zstd,
334            features: FeatureFlags {
335                has_disk: true,
336                has_memory: true,
337                variable_blocks: true,
338            },
339        };
340
341        let bytes = bincode::serialize(&header).unwrap();
342        let deserialized: Header = bincode::deserialize(&bytes).unwrap();
343
344        assert_eq!(deserialized, header);
345        assert_eq!(deserialized.block_size, 65536);
346        assert_eq!(deserialized.parent_path.as_deref(), Some("/parent.hxz"));
347        assert!(deserialized.features.has_disk);
348        assert!(deserialized.features.has_memory);
349        assert!(deserialized.features.variable_blocks);
350    }
351
352    #[test]
353    fn test_hexz_header_equality() {
354        let header1 = Header::default();
355        let header2 = Header::default();
356
357        assert_eq!(header1, header2);
358    }
359
360    #[test]
361    fn test_hexz_header_inequality() {
362        let mut header1 = Header::default();
363        let mut header2 = Header::default();
364
365        header1.block_size = 4096;
366        header2.block_size = 65536;
367
368        assert_ne!(header1, header2);
369    }
370
371    #[test]
372    fn test_compression_type_lz4() {
373        let compression = CompressionType::Lz4;
374        assert_eq!(compression, CompressionType::Lz4);
375    }
376
377    #[test]
378    fn test_compression_type_zstd() {
379        let compression = CompressionType::Zstd;
380        assert_eq!(compression, CompressionType::Zstd);
381    }
382
383    #[test]
384    fn test_compression_type_equality() {
385        assert_eq!(CompressionType::Lz4, CompressionType::Lz4);
386        assert_eq!(CompressionType::Zstd, CompressionType::Zstd);
387        assert_ne!(CompressionType::Lz4, CompressionType::Zstd);
388    }
389
390    #[test]
391    fn test_compression_type_serialization() {
392        let lz4 = CompressionType::Lz4;
393        let bytes = bincode::serialize(&lz4).unwrap();
394        let deserialized: CompressionType = bincode::deserialize(&bytes).unwrap();
395        assert_eq!(deserialized, CompressionType::Lz4);
396
397        let zstd = CompressionType::Zstd;
398        let bytes = bincode::serialize(&zstd).unwrap();
399        let deserialized: CompressionType = bincode::deserialize(&bytes).unwrap();
400        assert_eq!(deserialized, CompressionType::Zstd);
401    }
402
403    #[test]
404    fn test_feature_flags_default() {
405        let flags = FeatureFlags::default();
406
407        assert!(!flags.has_disk);
408        assert!(!flags.has_memory);
409        assert!(!flags.variable_blocks);
410    }
411
412    #[test]
413    fn test_feature_flags_disk_only() {
414        let flags = FeatureFlags {
415            has_disk: true,
416            has_memory: false,
417            variable_blocks: false,
418        };
419
420        assert!(flags.has_disk);
421        assert!(!flags.has_memory);
422        assert!(!flags.variable_blocks);
423    }
424
425    #[test]
426    fn test_feature_flags_memory_only() {
427        let flags = FeatureFlags {
428            has_disk: false,
429            has_memory: true,
430            variable_blocks: false,
431        };
432
433        assert!(!flags.has_disk);
434        assert!(flags.has_memory);
435        assert!(!flags.variable_blocks);
436    }
437
438    #[test]
439    fn test_feature_flags_full_vm_snapshot() {
440        let flags = FeatureFlags {
441            has_disk: true,
442            has_memory: true,
443            variable_blocks: false,
444        };
445
446        assert!(flags.has_disk);
447        assert!(flags.has_memory);
448        assert!(!flags.variable_blocks);
449    }
450
451    #[test]
452    fn test_feature_flags_with_variable_blocks() {
453        let flags = FeatureFlags {
454            has_disk: true,
455            has_memory: false,
456            variable_blocks: true,
457        };
458
459        assert!(flags.has_disk);
460        assert!(!flags.has_memory);
461        assert!(flags.variable_blocks);
462    }
463
464    #[test]
465    fn test_feature_flags_all_enabled() {
466        let flags = FeatureFlags {
467            has_disk: true,
468            has_memory: true,
469            variable_blocks: true,
470        };
471
472        assert!(flags.has_disk);
473        assert!(flags.has_memory);
474        assert!(flags.variable_blocks);
475    }
476
477    #[test]
478    fn test_feature_flags_equality() {
479        let flags1 = FeatureFlags {
480            has_disk: true,
481            has_memory: false,
482            variable_blocks: false,
483        };
484
485        let flags2 = FeatureFlags {
486            has_disk: true,
487            has_memory: false,
488            variable_blocks: false,
489        };
490
491        assert_eq!(flags1, flags2);
492    }
493
494    #[test]
495    fn test_feature_flags_inequality() {
496        let flags1 = FeatureFlags {
497            has_disk: true,
498            has_memory: false,
499            variable_blocks: false,
500        };
501
502        let flags2 = FeatureFlags {
503            has_disk: false,
504            has_memory: true,
505            variable_blocks: false,
506        };
507
508        assert_ne!(flags1, flags2);
509    }
510
511    #[test]
512    fn test_feature_flags_serialization() {
513        let flags = FeatureFlags {
514            has_disk: true,
515            has_memory: true,
516            variable_blocks: false,
517        };
518
519        let bytes = bincode::serialize(&flags).unwrap();
520        let deserialized: FeatureFlags = bincode::deserialize(&bytes).unwrap();
521
522        assert_eq!(deserialized, flags);
523    }
524
525    #[test]
526    #[allow(clippy::field_reassign_with_default)]
527    fn test_hexz_header_custom_block_size() {
528        let mut header = Header::default();
529        header.block_size = 131072; // 128 KB
530
531        assert_eq!(header.block_size, 131072);
532    }
533
534    #[test]
535    #[allow(clippy::field_reassign_with_default)]
536    fn test_hexz_header_large_index_offset() {
537        let mut header = Header::default();
538        header.index_offset = 1099511627776; // 1 TB
539
540        assert_eq!(header.index_offset, 1099511627776);
541    }
542
543    #[test]
544    fn test_hexz_header_clone() {
545        let header1 = Header::default();
546        let header2 = header1.clone();
547
548        assert_eq!(header1, header2);
549    }
550
551    #[test]
552    fn test_feature_flags_clone() {
553        let flags1 = FeatureFlags {
554            has_disk: true,
555            has_memory: true,
556            variable_blocks: true,
557        };
558        let flags2 = flags1;
559
560        assert_eq!(flags1, flags2);
561    }
562
563    #[test]
564    fn test_compression_type_clone() {
565        let comp1 = CompressionType::Zstd;
566        let comp2 = comp1;
567
568        assert_eq!(comp1, comp2);
569    }
570
571    #[test]
572    fn test_hexz_header_debug_format() {
573        let header = Header::default();
574        let debug_str = format!("{:?}", header);
575
576        assert!(debug_str.contains("Header"));
577        assert!(debug_str.contains("magic"));
578        assert!(debug_str.contains("version"));
579    }
580}