Skip to main content

hexz_core/format/
header.rs

1//! Archive file header and related enums.
2
3use hexz_common::constants::DEFAULT_BLOCK_SIZE;
4use hexz_common::crypto::KeyDerivationParams;
5use serde::{Deserialize, Serialize};
6
7use super::magic::{FORMAT_VERSION, MAGIC_BYTES};
8
9/// On-disk archive file header containing format metadata.
10#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
11pub struct Header {
12    /// Magic bytes identifying the file as a Hexz archive.
13    pub magic: [u8; 4],
14    /// Format version number for compatibility checks.
15    pub version: u32,
16    /// Block size in bytes used for data segmentation.
17    pub block_size: u32,
18    /// Byte offset of the master index within the archive.
19    pub index_offset: u64,
20
21    /// Paths to parent archives for thin (incremental) archives.
22    pub parent_paths: Vec<String>,
23
24    /// Byte offset of the compression dictionary, if present.
25    pub dictionary_offset: Option<u64>,
26    /// Length in bytes of the compression dictionary, if present.
27    pub dictionary_length: Option<u32>,
28    /// Byte offset of the metadata section, if present.
29    pub metadata_offset: Option<u64>,
30    /// Length in bytes of the metadata section, if present.
31    pub metadata_length: Option<u32>,
32    /// Byte offset of the digital signature, if present.
33    pub signature_offset: Option<u64>,
34    /// Length in bytes of the digital signature, if present.
35    pub signature_length: Option<u32>,
36    /// Key derivation parameters when the archive is encrypted.
37    pub encryption: Option<KeyDerivationParams>,
38    /// Compression algorithm used for data blocks.
39    pub compression: CompressionType,
40    /// Feature flags indicating enabled capabilities.
41    pub features: FeatureFlags,
42
43    /// Content-defined chunking parameters used for this archive.
44    /// (`fingerprint_bits`, `min_chunk`, `max_chunk`)
45    pub cdc_params: Option<(u32, u32, u32)>,
46}
47
48/// Supported compression algorithms for archive data blocks.
49#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
50pub enum CompressionType {
51    /// LZ4 compression algorithm (fast, lower ratio)
52    Lz4,
53    /// Zstandard compression algorithm (balanced, supports dictionaries)
54    Zstd,
55}
56
57/// Feature flags indicating capabilities enabled in this archive.
58#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
59pub struct FeatureFlags {
60    /// Archive contains a main data stream
61    pub has_main: bool,
62    /// Archive contains an auxiliary data stream
63    pub has_auxiliary: bool,
64    /// Content-defined chunking (CDC) was used for variable-sized blocks
65    pub variable_blocks: bool,
66}
67
68impl Header {
69    /// Read and deserialize a header from a [`std::io::Read`] source.
70    pub fn read_from<R: std::io::Read>(reader: &mut R) -> hexz_common::Result<Self> {
71        let mut header_bytes = [0u8; super::magic::HEADER_SIZE];
72        reader.read_exact(&mut header_bytes)?;
73        let header: Self = bincode::deserialize(&header_bytes)?;
74        if &header.magic != MAGIC_BYTES {
75            return Err(hexz_common::Error::Format("Invalid magic bytes".into()));
76        }
77        Ok(header)
78    }
79
80    /// Read a header from a [`StorageBackend`](crate::store::StorageBackend) at offset 0.
81    pub fn read_from_backend(
82        backend: &dyn crate::store::StorageBackend,
83    ) -> hexz_common::Result<Self> {
84        let header_bytes = backend.read_exact(0, super::magic::HEADER_SIZE)?;
85        let header: Self = bincode::deserialize(&header_bytes)?;
86        if &header.magic != MAGIC_BYTES {
87            return Err(hexz_common::Error::Format("Invalid magic bytes".into()));
88        }
89        Ok(header)
90    }
91
92    /// Load the compression dictionary from the backend, if present.
93    pub fn load_dictionary(
94        &self,
95        backend: &dyn crate::store::StorageBackend,
96    ) -> hexz_common::Result<Option<Vec<u8>>> {
97        if let (Some(offset), Some(length)) = (self.dictionary_offset, self.dictionary_length) {
98            Ok(Some(backend.read_exact(offset, length as usize)?.to_vec()))
99        } else {
100            Ok(None)
101        }
102    }
103}
104
105impl Default for Header {
106    fn default() -> Self {
107        Self {
108            magic: *MAGIC_BYTES,
109            version: FORMAT_VERSION,
110            block_size: DEFAULT_BLOCK_SIZE,
111            index_offset: 0,
112            parent_paths: Vec::new(),
113            dictionary_offset: None,
114            dictionary_length: None,
115            metadata_offset: None,
116            metadata_length: None,
117            signature_offset: None,
118            signature_length: None,
119            encryption: None,
120            compression: CompressionType::Lz4,
121            features: FeatureFlags::default(),
122            cdc_params: None,
123        }
124    }
125}