Skip to main content

hexz_core/format/
header.rs

1//! Snapshot file header and related enums.
2
3use hexz_common::constants::DEFAULT_BLOCK_SIZE;
4use hexz_common::crypto::KeyDerivationParams;
5use serde::{Deserialize, Serialize};
6
7use super::magic::{FORMAT_VERSION, MAGIC_BYTES};
8
9/// On-disk snapshot file header containing format metadata.
10///
11/// This structure is serialized at the beginning of every `.hxz` file and
12/// describes the format version, compression settings, encryption parameters,
13/// and locations of key data structures within the file.
14///
15/// # Binary Layout
16///
17/// The header occupies exactly 4096 bytes (HEADER_SIZE) at file offset 0 with
18/// the following logical structure:
19/// - Magic bytes (4): File signature "HEXZ"
20/// - Version (4): Format version number
21/// - Block size (4): Logical block size in bytes
22/// - Index offset (8): File offset to the master index structure
23/// - Parent path (variable): Optional path for thin snapshots
24/// - Dictionary offset/length: Optional compression dictionary location
25/// - Metadata offset/length: Optional user metadata location
26/// - Signature offset/length: Optional cryptographic signature location
27/// - Encryption parameters: Optional key derivation settings
28/// - Compression type: Algorithm used (LZ4 or Zstd)
29/// - Feature flags: Capabilities enabled in this snapshot
30///
31/// # Versioning
32///
33/// The version field enables forward compatibility. Readers check this field
34/// and reject files with incompatible versions. The current format version is
35/// defined in the `magic` module.
36///
37/// # Thin Provisioning
38///
39/// When `parent_paths` is set, this snapshot is a thin snapshot that references
40/// blocks from the parent. Blocks marked with [`BLOCK_OFFSET_PARENT`] are
41/// read from the parent snapshot instead of the current file.
42///
43/// [`BLOCK_OFFSET_PARENT`]: hexz_common::constants::BLOCK_OFFSET_PARENT
44#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
45pub struct Header {
46    pub magic: [u8; 4],
47    pub version: u32,
48    pub block_size: u32,
49    pub index_offset: u64,
50
51    pub parent_paths: Vec<String>,
52
53    pub dictionary_offset: Option<u64>,
54    pub dictionary_length: Option<u32>,
55    pub metadata_offset: Option<u64>,
56    pub metadata_length: Option<u32>,
57    pub signature_offset: Option<u64>,
58    pub signature_length: Option<u32>,
59    pub encryption: Option<KeyDerivationParams>,
60    pub compression: CompressionType,
61    pub features: FeatureFlags,
62}
63
64/// Compression algorithm used for block data.
65///
66/// This enum specifies which compression algorithm was used to compress
67/// the data blocks stored in the snapshot file. The compressor must be
68/// configured appropriately when reading the file.
69///
70/// # Supported Algorithms
71///
72/// - **LZ4**: Fast compression with lower ratios, ideal for latency-sensitive workloads
73/// - **Zstd**: Balanced compression with optional dictionary training for higher ratios
74///
75/// # Performance Characteristics
76///
77/// - LZ4: ~500 MB/s compression, ~2000 MB/s decompression (single-threaded)
78/// - Zstd (level 3): ~200 MB/s compression, ~600 MB/s decompression (single-threaded)
79///
80/// The actual performance depends on data characteristics, CPU capabilities, and
81/// whether dictionary compression is enabled for Zstd.
82#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
83pub enum CompressionType {
84    /// LZ4 compression algorithm (fast, lower ratio)
85    Lz4,
86    /// Zstandard compression algorithm (balanced, supports dictionaries)
87    Zstd,
88}
89
90/// Feature flags indicating capabilities enabled in this snapshot.
91///
92/// These boolean flags describe which optional features are present in the
93/// snapshot file. Readers must check these flags to determine how to
94/// interpret the file contents.
95///
96/// # Fields
97///
98/// - `has_disk`: Snapshot contains disk state (primary stream present in index)
99/// - `has_memory`: Snapshot contains memory state (secondary stream present in index)
100/// - `variable_blocks`: Content-defined chunking (CDC) was used instead of fixed-size blocks
101///
102/// # Usage
103///
104/// When both `has_disk` and `has_memory` are true, the snapshot is a full VM
105/// snapshot that can be used for live migration or checkpoint/restore. When
106/// only `has_disk` is true, it's a disk-only snapshot suitable for boot or backup.
107///
108/// The `variable_blocks` flag indicates that block sizes vary (CDC mode) and
109/// readers must use the `logical_len` field from each [`BlockInfo`] rather than
110/// assuming a fixed block size.
111///
112/// [`BlockInfo`]: super::index::BlockInfo
113#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
114pub struct FeatureFlags {
115    /// Snapshot contains disk state
116    pub has_disk: bool,
117    /// Snapshot contains memory state
118    pub has_memory: bool,
119    /// Content-defined chunking (CDC) was used for variable-sized blocks
120    pub variable_blocks: bool,
121}
122
123impl Header {
124    /// Read and deserialize a header from a [`std::io::Read`] source.
125    pub fn read_from<R: std::io::Read>(reader: &mut R) -> hexz_common::Result<Self> {
126        let mut header_bytes = [0u8; super::magic::HEADER_SIZE];
127        reader.read_exact(&mut header_bytes)?;
128        let header: Header = bincode::deserialize(&header_bytes)?;
129        Ok(header)
130    }
131
132    /// Read a header from a [`StorageBackend`](crate::store::StorageBackend) at offset 0.
133    pub fn read_from_backend(
134        backend: &dyn crate::store::StorageBackend,
135    ) -> hexz_common::Result<Self> {
136        let header_bytes = backend.read_exact(0, super::magic::HEADER_SIZE)?;
137        let header: Header = bincode::deserialize(&header_bytes)?;
138        Ok(header)
139    }
140
141    /// Load the compression dictionary from the backend, if present.
142    pub fn load_dictionary(
143        &self,
144        backend: &dyn crate::store::StorageBackend,
145    ) -> hexz_common::Result<Option<Vec<u8>>> {
146        if let (Some(offset), Some(length)) = (self.dictionary_offset, self.dictionary_length) {
147            Ok(Some(backend.read_exact(offset, length as usize)?.to_vec()))
148        } else {
149            Ok(None)
150        }
151    }
152}
153
154impl Default for Header {
155    fn default() -> Self {
156        Self {
157            magic: *MAGIC_BYTES,
158            version: FORMAT_VERSION,
159            block_size: DEFAULT_BLOCK_SIZE,
160            index_offset: 0,
161            parent_paths: Vec::new(),
162            dictionary_offset: None,
163            dictionary_length: None,
164            metadata_offset: None,
165            metadata_length: None,
166            signature_offset: None,
167            signature_length: None,
168            encryption: None,
169            compression: CompressionType::Lz4,
170            features: FeatureFlags::default(),
171        }
172    }
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178
179    #[test]
180    fn test_hexz_header_default() {
181        let header = Header::default();
182
183        assert_eq!(header.magic, *MAGIC_BYTES);
184        assert_eq!(header.version, FORMAT_VERSION);
185        assert_eq!(header.block_size, DEFAULT_BLOCK_SIZE);
186        assert_eq!(header.index_offset, 0);
187        assert!(header.parent_paths.is_empty());
188        assert!(header.dictionary_offset.is_none());
189        assert!(header.dictionary_length.is_none());
190        assert!(header.metadata_offset.is_none());
191        assert!(header.metadata_length.is_none());
192        assert!(header.signature_offset.is_none());
193        assert!(header.signature_length.is_none());
194        assert!(header.encryption.is_none());
195        assert_eq!(header.compression, CompressionType::Lz4);
196        assert!(!header.features.has_disk);
197        assert!(!header.features.has_memory);
198        assert!(!header.features.variable_blocks);
199    }
200
201    #[test]
202    fn test_hexz_header_with_disk() {
203        let mut header = Header::default();
204        header.features.has_disk = true;
205        header.index_offset = 1048576;
206
207        assert!(header.features.has_disk);
208        assert_eq!(header.index_offset, 1048576);
209    }
210
211    #[test]
212    fn test_hexz_header_with_memory() {
213        let mut header = Header::default();
214        header.features.has_memory = true;
215
216        assert!(header.features.has_memory);
217    }
218
219    #[test]
220    fn test_hexz_header_with_both_streams() {
221        let mut header = Header::default();
222        header.features.has_disk = true;
223        header.features.has_memory = true;
224
225        assert!(header.features.has_disk);
226        assert!(header.features.has_memory);
227    }
228
229    #[test]
230    #[allow(clippy::field_reassign_with_default)]
231    fn test_hexz_header_with_parent_path() {
232        let mut header = Header::default();
233        header.parent_paths = vec!["/path/to/parent.hxz".to_string()];
234
235        assert_eq!(header.parent_paths, vec!["/path/to/parent.hxz"]);
236    }
237
238    #[test]
239    #[allow(clippy::field_reassign_with_default)]
240    fn test_hexz_header_with_dictionary() {
241        let mut header = Header::default();
242        header.dictionary_offset = Some(4096);
243        header.dictionary_length = Some(16384);
244
245        assert_eq!(header.dictionary_offset, Some(4096));
246        assert_eq!(header.dictionary_length, Some(16384));
247    }
248
249    #[test]
250    #[allow(clippy::field_reassign_with_default)]
251    fn test_hexz_header_with_metadata() {
252        let mut header = Header::default();
253        header.metadata_offset = Some(20480);
254        header.metadata_length = Some(1024);
255
256        assert_eq!(header.metadata_offset, Some(20480));
257        assert_eq!(header.metadata_length, Some(1024));
258    }
259
260    #[test]
261    #[allow(clippy::field_reassign_with_default)]
262    fn test_hexz_header_with_signature() {
263        let mut header = Header::default();
264        header.signature_offset = Some(24576);
265        header.signature_length = Some(256);
266
267        assert_eq!(header.signature_offset, Some(24576));
268        assert_eq!(header.signature_length, Some(256));
269    }
270
271    #[test]
272    #[allow(clippy::field_reassign_with_default)]
273    fn test_hexz_header_with_encryption() {
274        let mut header = Header::default();
275        header.encryption = Some(KeyDerivationParams {
276            salt: [0x42; 16],
277            iterations: 100000,
278        });
279
280        assert!(header.encryption.is_some());
281        let params = header.encryption.unwrap();
282        assert_eq!(params.salt, [0x42; 16]);
283        assert_eq!(params.iterations, 100000);
284    }
285
286    #[test]
287    #[allow(clippy::field_reassign_with_default)]
288    fn test_hexz_header_zstd_compression() {
289        let mut header = Header::default();
290        header.compression = CompressionType::Zstd;
291
292        assert_eq!(header.compression, CompressionType::Zstd);
293    }
294
295    #[test]
296    fn test_hexz_header_variable_blocks() {
297        let mut header = Header::default();
298        header.features.variable_blocks = true;
299
300        assert!(header.features.variable_blocks);
301    }
302
303    #[test]
304    fn test_hexz_header_serialization() {
305        let header = Header::default();
306
307        let bytes = bincode::serialize(&header).unwrap();
308        let deserialized: Header = bincode::deserialize(&bytes).unwrap();
309
310        assert_eq!(deserialized, header);
311    }
312
313    #[test]
314    fn test_hexz_header_serialization_with_all_fields() {
315        let header = Header {
316            magic: *MAGIC_BYTES,
317            version: FORMAT_VERSION,
318            block_size: 65536,
319            index_offset: 1048576,
320            parent_paths: vec!["/parent.hxz".to_string()],
321            dictionary_offset: Some(4096),
322            dictionary_length: Some(16384),
323            metadata_offset: Some(20480),
324            metadata_length: Some(1024),
325            signature_offset: Some(24576),
326            signature_length: Some(256),
327            encryption: Some(KeyDerivationParams {
328                salt: [0x42; 16],
329                iterations: 100000,
330            }),
331            compression: CompressionType::Zstd,
332            features: FeatureFlags {
333                has_disk: true,
334                has_memory: true,
335                variable_blocks: true,
336            },
337        };
338
339        let bytes = bincode::serialize(&header).unwrap();
340        let deserialized: Header = bincode::deserialize(&bytes).unwrap();
341
342        assert_eq!(deserialized, header);
343        assert_eq!(deserialized.block_size, 65536);
344        assert_eq!(deserialized.parent_paths, vec!["/parent.hxz"]);
345        assert!(deserialized.features.has_disk);
346        assert!(deserialized.features.has_memory);
347        assert!(deserialized.features.variable_blocks);
348    }
349
350    #[test]
351    fn test_hexz_header_equality() {
352        let header1 = Header::default();
353        let header2 = Header::default();
354
355        assert_eq!(header1, header2);
356    }
357
358    #[test]
359    fn test_hexz_header_inequality() {
360        let mut header1 = Header::default();
361        let mut header2 = Header::default();
362
363        header1.block_size = 4096;
364        header2.block_size = 65536;
365
366        assert_ne!(header1, header2);
367    }
368
369    #[test]
370    fn test_compression_type_lz4() {
371        let compression = CompressionType::Lz4;
372        assert_eq!(compression, CompressionType::Lz4);
373    }
374
375    #[test]
376    fn test_compression_type_zstd() {
377        let compression = CompressionType::Zstd;
378        assert_eq!(compression, CompressionType::Zstd);
379    }
380
381    #[test]
382    fn test_compression_type_equality() {
383        assert_eq!(CompressionType::Lz4, CompressionType::Lz4);
384        assert_eq!(CompressionType::Zstd, CompressionType::Zstd);
385        assert_ne!(CompressionType::Lz4, CompressionType::Zstd);
386    }
387
388    #[test]
389    fn test_compression_type_serialization() {
390        let lz4 = CompressionType::Lz4;
391        let bytes = bincode::serialize(&lz4).unwrap();
392        let deserialized: CompressionType = bincode::deserialize(&bytes).unwrap();
393        assert_eq!(deserialized, CompressionType::Lz4);
394
395        let zstd = CompressionType::Zstd;
396        let bytes = bincode::serialize(&zstd).unwrap();
397        let deserialized: CompressionType = bincode::deserialize(&bytes).unwrap();
398        assert_eq!(deserialized, CompressionType::Zstd);
399    }
400
401    #[test]
402    fn test_feature_flags_default() {
403        let flags = FeatureFlags::default();
404
405        assert!(!flags.has_disk);
406        assert!(!flags.has_memory);
407        assert!(!flags.variable_blocks);
408    }
409
410    #[test]
411    fn test_feature_flags_disk_only() {
412        let flags = FeatureFlags {
413            has_disk: true,
414            has_memory: false,
415            variable_blocks: false,
416        };
417
418        assert!(flags.has_disk);
419        assert!(!flags.has_memory);
420        assert!(!flags.variable_blocks);
421    }
422
423    #[test]
424    fn test_feature_flags_memory_only() {
425        let flags = FeatureFlags {
426            has_disk: false,
427            has_memory: true,
428            variable_blocks: false,
429        };
430
431        assert!(!flags.has_disk);
432        assert!(flags.has_memory);
433        assert!(!flags.variable_blocks);
434    }
435
436    #[test]
437    fn test_feature_flags_full_vm_snapshot() {
438        let flags = FeatureFlags {
439            has_disk: true,
440            has_memory: true,
441            variable_blocks: false,
442        };
443
444        assert!(flags.has_disk);
445        assert!(flags.has_memory);
446        assert!(!flags.variable_blocks);
447    }
448
449    #[test]
450    fn test_feature_flags_with_variable_blocks() {
451        let flags = FeatureFlags {
452            has_disk: true,
453            has_memory: false,
454            variable_blocks: true,
455        };
456
457        assert!(flags.has_disk);
458        assert!(!flags.has_memory);
459        assert!(flags.variable_blocks);
460    }
461
462    #[test]
463    fn test_feature_flags_all_enabled() {
464        let flags = FeatureFlags {
465            has_disk: true,
466            has_memory: true,
467            variable_blocks: true,
468        };
469
470        assert!(flags.has_disk);
471        assert!(flags.has_memory);
472        assert!(flags.variable_blocks);
473    }
474
475    #[test]
476    fn test_feature_flags_equality() {
477        let flags1 = FeatureFlags {
478            has_disk: true,
479            has_memory: false,
480            variable_blocks: false,
481        };
482
483        let flags2 = FeatureFlags {
484            has_disk: true,
485            has_memory: false,
486            variable_blocks: false,
487        };
488
489        assert_eq!(flags1, flags2);
490    }
491
492    #[test]
493    fn test_feature_flags_inequality() {
494        let flags1 = FeatureFlags {
495            has_disk: true,
496            has_memory: false,
497            variable_blocks: false,
498        };
499
500        let flags2 = FeatureFlags {
501            has_disk: false,
502            has_memory: true,
503            variable_blocks: false,
504        };
505
506        assert_ne!(flags1, flags2);
507    }
508
509    #[test]
510    fn test_feature_flags_serialization() {
511        let flags = FeatureFlags {
512            has_disk: true,
513            has_memory: true,
514            variable_blocks: false,
515        };
516
517        let bytes = bincode::serialize(&flags).unwrap();
518        let deserialized: FeatureFlags = bincode::deserialize(&bytes).unwrap();
519
520        assert_eq!(deserialized, flags);
521    }
522
523    #[test]
524    #[allow(clippy::field_reassign_with_default)]
525    fn test_hexz_header_custom_block_size() {
526        let mut header = Header::default();
527        header.block_size = 131072; // 128 KB
528
529        assert_eq!(header.block_size, 131072);
530    }
531
532    #[test]
533    #[allow(clippy::field_reassign_with_default)]
534    fn test_hexz_header_large_index_offset() {
535        let mut header = Header::default();
536        header.index_offset = 1099511627776; // 1 TB
537
538        assert_eq!(header.index_offset, 1099511627776);
539    }
540
541    #[test]
542    fn test_hexz_header_clone() {
543        let header1 = Header::default();
544        let header2 = header1.clone();
545
546        assert_eq!(header1, header2);
547    }
548
549    #[test]
550    fn test_feature_flags_clone() {
551        let flags1 = FeatureFlags {
552            has_disk: true,
553            has_memory: true,
554            variable_blocks: true,
555        };
556        let flags2 = flags1;
557
558        assert_eq!(flags1, flags2);
559    }
560
561    #[test]
562    fn test_compression_type_clone() {
563        let comp1 = CompressionType::Zstd;
564        let comp2 = comp1;
565
566        assert_eq!(comp1, comp2);
567    }
568
569    #[test]
570    fn test_hexz_header_debug_format() {
571        let header = Header::default();
572        let debug_str = format!("{:?}", header);
573
574        assert!(debug_str.contains("Header"));
575        assert!(debug_str.contains("magic"));
576        assert!(debug_str.contains("version"));
577    }
578}