Skip to main content

crush_parallel/
format.rs

1//! CRSH binary file format serialization/deserialization.
2//!
3//! # File Layout
4//!
5//! ```text
6//! Offset 0:        FileHeader (64 bytes)
7//! Offset 64:       Block 0 — BlockHeader (16 bytes) + payload (compressed_size bytes)
8//! ...              Block N-1
9//! Offset X:        IndexHeader (8 bytes)
10//! Offset X+8:      BlockIndexEntry[0..N] (20 bytes each)
11//! Offset X+8+20N:  FileFooter (24 bytes)  ← last 24 bytes of file
12//! ```
13
14use crc32fast::Hasher;
15use crush_core::error::{CrushError, Result};
16
17/// CRSH magic bytes: ASCII "CRSH"
18pub const CRSH_MAGIC: [u8; 4] = [0x43, 0x52, 0x53, 0x48];
19
20/// Current format version. Files with a different version are rejected.
21pub const FORMAT_VERSION: u32 = 1;
22
23/// Current engine semantic version (set at build time).
24pub const ENGINE_VERSION_STR: &str = env!("CARGO_PKG_VERSION");
25
26// ---------------------------------------------------------------------------
27// EngineVersion
28// ---------------------------------------------------------------------------
29
30/// Packed 8-byte semantic version of the producing engine.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub struct EngineVersion {
33    pub major: u16,
34    pub minor: u16,
35    pub patch: u16,
36    pub pre: u8,
37    pub build: u8,
38}
39
40impl EngineVersion {
41    /// Parse the crate's `CARGO_PKG_VERSION` into an [`EngineVersion`].
42    #[must_use]
43    pub fn current() -> Self {
44        let v = ENGINE_VERSION_STR;
45        let mut parts = v.split('.');
46        let major = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
47        let minor = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
48        let patch = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
49        Self {
50            major,
51            minor,
52            patch,
53            pre: 0,
54            build: 0,
55        }
56    }
57
58    /// Serialize to 8 bytes (little-endian).
59    #[must_use]
60    pub fn to_bytes(self) -> [u8; 8] {
61        let mut b = [0u8; 8];
62        b[0..2].copy_from_slice(&self.major.to_le_bytes());
63        b[2..4].copy_from_slice(&self.minor.to_le_bytes());
64        b[4..6].copy_from_slice(&self.patch.to_le_bytes());
65        b[6] = self.pre;
66        b[7] = self.build;
67        b
68    }
69
70    /// Deserialize from 8 bytes.
71    #[must_use]
72    pub fn from_bytes(b: &[u8; 8]) -> Self {
73        Self {
74            major: u16::from_le_bytes([b[0], b[1]]),
75            minor: u16::from_le_bytes([b[2], b[3]]),
76            patch: u16::from_le_bytes([b[4], b[5]]),
77            pre: b[6],
78            build: b[7],
79        }
80    }
81
82    /// Format as a human-readable version string.
83    #[must_use]
84    pub fn to_string_repr(self) -> String {
85        format!("{}.{}.{}", self.major, self.minor, self.patch)
86    }
87}
88
89// ---------------------------------------------------------------------------
90// FileFlags
91// ---------------------------------------------------------------------------
92
93/// Bitfield flags stored in the file header.
94#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
95pub struct FileFlags(pub u8);
96
97impl FileFlags {
98    /// Bit 0: per-block CRC32 checksums are present and validated.
99    pub const CHECKSUMS_ENABLED: u8 = 0b0000_0001;
100    /// Bit 1: file was produced from a streaming input (sizes may be `u64::MAX`).
101    pub const STREAMING: u8 = 0b0000_0010;
102
103    #[must_use]
104    pub fn checksums_enabled(self) -> bool {
105        self.0 & Self::CHECKSUMS_ENABLED != 0
106    }
107    #[must_use]
108    pub fn streaming(self) -> bool {
109        self.0 & Self::STREAMING != 0
110    }
111
112    #[must_use]
113    pub fn with_checksums(mut self) -> Self {
114        self.0 |= Self::CHECKSUMS_ENABLED;
115        self
116    }
117    #[must_use]
118    pub fn with_streaming(mut self) -> Self {
119        self.0 |= Self::STREAMING;
120        self
121    }
122}
123
124// ---------------------------------------------------------------------------
125// BlockFlags
126// ---------------------------------------------------------------------------
127
128/// Per-block bitfield flags stored in the block header.
129#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
130pub struct BlockFlags(pub u8);
131
132impl BlockFlags {
133    /// Bit 0: block is stored raw (uncompressed).
134    pub const STORED: u8 = 0b0000_0001;
135
136    #[must_use]
137    pub fn stored(self) -> bool {
138        self.0 & Self::STORED != 0
139    }
140
141    #[must_use]
142    pub fn with_stored(mut self) -> Self {
143        self.0 |= Self::STORED;
144        self
145    }
146}
147
148// ---------------------------------------------------------------------------
149// FileHeader (64 bytes)
150// ---------------------------------------------------------------------------
151
152/// Fixed 64-byte header at the start of every CRSH file.
153#[derive(Debug, Clone, PartialEq, Eq)]
154pub struct FileHeader {
155    pub magic: [u8; 4],
156    pub format_version: u32,
157    pub engine_version: EngineVersion,
158    pub block_size: u32,
159    pub compression_level: u8,
160    pub flags: FileFlags,
161    /// `u64::MAX` = unknown (streaming input).
162    pub uncompressed_size: u64,
163    /// `u64::MAX` = unknown (streaming input).
164    pub block_count: u64,
165}
166
167impl FileHeader {
168    pub const SIZE: usize = 64;
169
170    /// Build a header from engine configuration values.
171    #[must_use]
172    pub fn new(
173        block_size: u32,
174        compression_level: u8,
175        flags: FileFlags,
176        uncompressed_size: u64,
177        block_count: u64,
178    ) -> Self {
179        Self {
180            magic: CRSH_MAGIC,
181            format_version: FORMAT_VERSION,
182            engine_version: EngineVersion::current(),
183            block_size,
184            compression_level,
185            flags,
186            uncompressed_size,
187            block_count,
188        }
189    }
190
191    /// Serialize to exactly 64 bytes (little-endian).
192    #[must_use]
193    pub fn to_bytes(&self) -> [u8; Self::SIZE] {
194        let mut b = [0u8; Self::SIZE];
195        b[0..4].copy_from_slice(&self.magic);
196        b[4..8].copy_from_slice(&self.format_version.to_le_bytes());
197        b[8..16].copy_from_slice(&self.engine_version.to_bytes());
198        b[16..20].copy_from_slice(&self.block_size.to_le_bytes());
199        b[20] = self.compression_level;
200        b[21] = self.flags.0;
201        // b[22..24] reserved — zero
202        b[24..32].copy_from_slice(&self.uncompressed_size.to_le_bytes());
203        b[32..40].copy_from_slice(&self.block_count.to_le_bytes());
204        // b[40..64] reserved — zero
205        b
206    }
207
208    /// Deserialize and validate a 64-byte slice.
209    ///
210    /// # Errors
211    ///
212    /// - [`CrushError::InvalidFormat`] if magic bytes are wrong.
213    /// - [`CrushError::VersionMismatch`] if `format_version` differs.
214    pub fn from_bytes(b: &[u8; Self::SIZE]) -> Result<Self> {
215        let magic = [b[0], b[1], b[2], b[3]];
216        if magic != CRSH_MAGIC {
217            return Err(CrushError::InvalidFormat(format!(
218                "expected magic {CRSH_MAGIC:?}, got {magic:?}"
219            )));
220        }
221        let format_version = u32::from_le_bytes([b[4], b[5], b[6], b[7]]);
222        if format_version != FORMAT_VERSION {
223            let ev = EngineVersion::from_bytes(b[8..16].try_into().map_err(|_| {
224                CrushError::InvalidFormat("header too short for engine version".to_owned())
225            })?);
226            return Err(CrushError::VersionMismatch {
227                file_version: format!("format v{format_version} (engine {})", ev.to_string_repr()),
228                current_version: format!("format v{FORMAT_VERSION} (engine {ENGINE_VERSION_STR})"),
229            });
230        }
231        let engine_version = EngineVersion::from_bytes(
232            b[8..16]
233                .try_into()
234                .map_err(|_| CrushError::InvalidFormat("header too short".to_owned()))?,
235        );
236        let block_size = u32::from_le_bytes([b[16], b[17], b[18], b[19]]);
237        let compression_level = b[20];
238        let flags = FileFlags(b[21]);
239        let uncompressed_size = u64::from_le_bytes(b[24..32].try_into().map_err(|_| {
240            CrushError::InvalidFormat("header truncated at uncompressed_size".to_owned())
241        })?);
242        let block_count = u64::from_le_bytes(b[32..40].try_into().map_err(|_| {
243            CrushError::InvalidFormat("header truncated at block_count".to_owned())
244        })?);
245        Ok(Self {
246            magic,
247            format_version,
248            engine_version,
249            block_size,
250            compression_level,
251            flags,
252            uncompressed_size,
253            block_count,
254        })
255    }
256}
257
258// ---------------------------------------------------------------------------
259// BlockHeader (16 bytes)
260// ---------------------------------------------------------------------------
261
262/// Fixed 16-byte header preceding each compressed block's payload.
263#[derive(Debug, Clone, Copy, PartialEq, Eq)]
264pub struct BlockHeader {
265    pub compressed_size: u32,
266    pub uncompressed_size: u32,
267    /// CRC32 of the **uncompressed** block data. `0` if checksums disabled.
268    pub checksum: u32,
269    pub flags: BlockFlags,
270}
271
272impl BlockHeader {
273    pub const SIZE: usize = 16;
274
275    #[must_use]
276    pub fn to_bytes(self) -> [u8; Self::SIZE] {
277        let mut b = [0u8; Self::SIZE];
278        b[0..4].copy_from_slice(&self.compressed_size.to_le_bytes());
279        b[4..8].copy_from_slice(&self.uncompressed_size.to_le_bytes());
280        b[8..12].copy_from_slice(&self.checksum.to_le_bytes());
281        b[12] = self.flags.0;
282        // b[13..16] reserved — zero
283        b
284    }
285
286    #[must_use]
287    pub fn from_bytes(b: &[u8; Self::SIZE]) -> Self {
288        Self {
289            compressed_size: u32::from_le_bytes([b[0], b[1], b[2], b[3]]),
290            uncompressed_size: u32::from_le_bytes([b[4], b[5], b[6], b[7]]),
291            checksum: u32::from_le_bytes([b[8], b[9], b[10], b[11]]),
292            flags: BlockFlags(b[12]),
293        }
294    }
295}
296
297// ---------------------------------------------------------------------------
298// BlockIndexEntry (20 bytes)
299// ---------------------------------------------------------------------------
300
301/// One entry in the trailing block index.
302#[derive(Debug, Clone, Copy, PartialEq, Eq)]
303pub struct BlockIndexEntry {
304    /// Absolute byte offset of the [`BlockHeader`] from start of file.
305    pub block_offset: u64,
306    pub compressed_size: u32,
307    pub uncompressed_size: u32,
308    pub checksum: u32,
309}
310
311impl BlockIndexEntry {
312    pub const SIZE: usize = 20;
313
314    #[must_use]
315    pub fn to_bytes(self) -> [u8; Self::SIZE] {
316        let mut b = [0u8; Self::SIZE];
317        b[0..8].copy_from_slice(&self.block_offset.to_le_bytes());
318        b[8..12].copy_from_slice(&self.compressed_size.to_le_bytes());
319        b[12..16].copy_from_slice(&self.uncompressed_size.to_le_bytes());
320        b[16..20].copy_from_slice(&self.checksum.to_le_bytes());
321        b
322    }
323
324    #[must_use]
325    pub fn from_bytes(b: &[u8; Self::SIZE]) -> Self {
326        Self {
327            block_offset: u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
328            compressed_size: u32::from_le_bytes([b[8], b[9], b[10], b[11]]),
329            uncompressed_size: u32::from_le_bytes([b[12], b[13], b[14], b[15]]),
330            checksum: u32::from_le_bytes([b[16], b[17], b[18], b[19]]),
331        }
332    }
333}
334
335// ---------------------------------------------------------------------------
336// IndexHeader (8 bytes)
337// ---------------------------------------------------------------------------
338
339/// 8-byte header immediately before the block index entries.
340#[derive(Debug, Clone, Copy, PartialEq, Eq)]
341pub struct IndexHeader {
342    pub entry_count: u32,
343    /// Reserved — must be 0.
344    pub index_flags: u32,
345}
346
347impl IndexHeader {
348    pub const SIZE: usize = 8;
349
350    #[must_use]
351    pub fn to_bytes(self) -> [u8; Self::SIZE] {
352        let mut b = [0u8; Self::SIZE];
353        b[0..4].copy_from_slice(&self.entry_count.to_le_bytes());
354        b[4..8].copy_from_slice(&self.index_flags.to_le_bytes());
355        b
356    }
357
358    #[must_use]
359    pub fn from_bytes(b: &[u8; Self::SIZE]) -> Self {
360        Self {
361            entry_count: u32::from_le_bytes([b[0], b[1], b[2], b[3]]),
362            index_flags: u32::from_le_bytes([b[4], b[5], b[6], b[7]]),
363        }
364    }
365}
366
367// ---------------------------------------------------------------------------
368// FileFooter (24 bytes)
369// ---------------------------------------------------------------------------
370
371/// Fixed 24-byte record at the very end of every CRSH file.
372///
373/// Reading algorithm:
374/// 1. Seek to `file_size - 24`.
375/// 2. Read and deserialize this struct.
376/// 3. Validate magic, `format_version`, and `footer_checksum`.
377/// 4. Seek to `index_offset` to read the index region.
378#[derive(Debug, Clone, Copy, PartialEq, Eq)]
379pub struct FileFooter {
380    /// Absolute byte offset of the [`IndexHeader`] from start of file.
381    pub index_offset: u64,
382    /// Byte length of the index region: `8 + 20 * block_count`.
383    pub index_size: u32,
384    /// CRC32 of bytes `[0..12]` of this footer (protects `index_offset` + `index_size`).
385    pub footer_checksum: u32,
386    /// Redundant copy of `FileHeader::format_version`.
387    pub format_version: u32,
388    pub magic: [u8; 4],
389}
390
391impl FileFooter {
392    pub const SIZE: usize = 24;
393
394    /// Build and compute the footer checksum.
395    #[must_use]
396    pub fn new(index_offset: u64, index_size: u32) -> Self {
397        let mut f = Self {
398            index_offset,
399            index_size,
400            footer_checksum: 0,
401            format_version: FORMAT_VERSION,
402            magic: CRSH_MAGIC,
403        };
404        f.footer_checksum = f.compute_checksum();
405        f
406    }
407
408    fn compute_checksum(self) -> u32 {
409        let b = self.to_bytes_unchecked();
410        let mut h = Hasher::new();
411        h.update(&b[0..12]);
412        h.finalize()
413    }
414
415    fn to_bytes_unchecked(self) -> [u8; Self::SIZE] {
416        let mut b = [0u8; Self::SIZE];
417        b[0..8].copy_from_slice(&self.index_offset.to_le_bytes());
418        b[8..12].copy_from_slice(&self.index_size.to_le_bytes());
419        b[12..16].copy_from_slice(&self.footer_checksum.to_le_bytes());
420        b[16..20].copy_from_slice(&self.format_version.to_le_bytes());
421        b[20..24].copy_from_slice(&self.magic);
422        b
423    }
424
425    #[must_use]
426    pub fn to_bytes(self) -> [u8; Self::SIZE] {
427        self.to_bytes_unchecked()
428    }
429
430    /// Deserialize and validate a 24-byte footer.
431    ///
432    /// # Errors
433    ///
434    /// - [`CrushError::InvalidFormat`] if magic is wrong or checksum fails.
435    /// - [`CrushError::VersionMismatch`] if `format_version` differs.
436    pub fn from_bytes(b: &[u8; Self::SIZE]) -> Result<Self> {
437        let magic = [b[20], b[21], b[22], b[23]];
438        if magic != CRSH_MAGIC {
439            return Err(CrushError::InvalidFormat(format!(
440                "footer magic {magic:?} does not match CRSH"
441            )));
442        }
443        let format_version = u32::from_le_bytes([b[16], b[17], b[18], b[19]]);
444        if format_version != FORMAT_VERSION {
445            return Err(CrushError::VersionMismatch {
446                file_version: format!("format v{format_version}"),
447                current_version: format!("format v{FORMAT_VERSION} (engine {ENGINE_VERSION_STR})"),
448            });
449        }
450        let footer = Self {
451            index_offset: u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
452            index_size: u32::from_le_bytes([b[8], b[9], b[10], b[11]]),
453            footer_checksum: u32::from_le_bytes([b[12], b[13], b[14], b[15]]),
454            format_version,
455            magic,
456        };
457        let expected = {
458            let mut h = Hasher::new();
459            h.update(&b[0..12]);
460            h.finalize()
461        };
462        if footer.footer_checksum != expected {
463            return Err(CrushError::IndexCorrupted(format!(
464                "footer checksum mismatch: expected {expected:#010x}, got {:#010x}",
465                footer.footer_checksum
466            )));
467        }
468        Ok(footer)
469    }
470}
471
472// ---------------------------------------------------------------------------
473// Tests
474// ---------------------------------------------------------------------------
475
476#[cfg(test)]
477#[allow(clippy::expect_used, clippy::unwrap_used)]
478mod tests {
479    use super::*;
480
481    #[test]
482    fn test_file_header_roundtrip() {
483        let h = FileHeader::new(
484            1_048_576,
485            6,
486            FileFlags::default().with_checksums(),
487            10_000_000,
488            10,
489        );
490        let bytes = h.to_bytes();
491        assert_eq!(bytes.len(), FileHeader::SIZE);
492        let h2 = FileHeader::from_bytes(&bytes).expect("roundtrip");
493        assert_eq!(h, h2);
494    }
495
496    #[test]
497    fn test_file_header_magic_rejection() {
498        let mut bytes = [0u8; FileHeader::SIZE];
499        bytes[0] = 0xFF; // wrong magic
500        let result = FileHeader::from_bytes(&bytes);
501        assert!(result.is_err());
502    }
503
504    #[test]
505    fn test_file_header_version_mismatch() {
506        let mut h = FileHeader::new(1_048_576, 6, FileFlags::default(), 0, 0);
507        h.magic = CRSH_MAGIC; // ensure correct magic
508        let mut bytes = h.to_bytes();
509        // Overwrite format_version with an unsupported value
510        bytes[4..8].copy_from_slice(&9999u32.to_le_bytes());
511        let result = FileHeader::from_bytes(&bytes);
512        assert!(matches!(result, Err(CrushError::VersionMismatch { .. })));
513    }
514
515    #[test]
516    fn test_block_header_roundtrip() {
517        let bh = BlockHeader {
518            compressed_size: 512,
519            uncompressed_size: 1024,
520            checksum: 0xDEAD_BEEF,
521            flags: BlockFlags::default(),
522        };
523        let bytes = bh.to_bytes();
524        assert_eq!(bytes.len(), BlockHeader::SIZE);
525        let bh2 = BlockHeader::from_bytes(&bytes);
526        assert_eq!(bh, bh2);
527    }
528
529    #[test]
530    fn test_block_index_entry_roundtrip() {
531        let e = BlockIndexEntry {
532            block_offset: 12345,
533            compressed_size: 888,
534            uncompressed_size: 1024,
535            checksum: 0xCAFE_BABE,
536        };
537        let bytes = e.to_bytes();
538        assert_eq!(bytes.len(), BlockIndexEntry::SIZE);
539        let e2 = BlockIndexEntry::from_bytes(&bytes);
540        assert_eq!(e, e2);
541    }
542
543    #[test]
544    fn test_index_header_roundtrip() {
545        let ih = IndexHeader {
546            entry_count: 42,
547            index_flags: 0,
548        };
549        let bytes = ih.to_bytes();
550        assert_eq!(bytes.len(), IndexHeader::SIZE);
551        let ih2 = IndexHeader::from_bytes(&bytes);
552        assert_eq!(ih, ih2);
553    }
554
555    #[test]
556    fn test_file_footer_roundtrip() {
557        let ff = FileFooter::new(99999, 8 + 20 * 10);
558        let bytes = ff.to_bytes();
559        assert_eq!(bytes.len(), FileFooter::SIZE);
560        let ff2 = FileFooter::from_bytes(&bytes).expect("roundtrip");
561        assert_eq!(ff, ff2);
562    }
563
564    #[test]
565    fn test_file_footer_magic_rejection() {
566        let mut bytes = [0u8; FileFooter::SIZE];
567        bytes[20..24].copy_from_slice(&[0xFF, 0xFF, 0xFF, 0xFF]);
568        let result = FileFooter::from_bytes(&bytes);
569        assert!(result.is_err());
570    }
571
572    #[test]
573    fn test_file_footer_truncated_detection() {
574        // A correct footer but with a mangled checksum byte
575        let ff = FileFooter::new(1000, 208);
576        let mut bytes = ff.to_bytes();
577        bytes[12] ^= 0xFF; // corrupt checksum
578        let result = FileFooter::from_bytes(&bytes);
579        assert!(matches!(result, Err(CrushError::IndexCorrupted(_))));
580    }
581}