Skip to main content

crush_gpu/
format.rs

1//! GPU tile format: header, tile, index, and footer structures
2//!
3//! # File Layout
4//!
5//! ```text
6//! Offset 0:           GpuFileHeader (64 bytes)
7//! Offset 64:          Tile 0 — TileHeader (32 bytes) + payload (padded to 128-byte boundary)
8//! ...                 Tile N-1
9//! Offset X:           TileIndexHeader (8 bytes)
10//! Offset X+8:         TileIndexEntry[0..N] (24 bytes each)
11//! Offset X+8+24N:     GpuFileFooter (24 bytes)  ← last 24 bytes of file
12//! ```
13
14use crc32fast::Hasher;
15use crush_core::error::{CrushError, Result};
16
17/// CGPU magic bytes: ASCII "CGPU"
18pub const CGPU_MAGIC: [u8; 4] = [0x43, 0x47, 0x50, 0x55];
19
20/// Current format version. Files with a different version are rejected.
21/// Version 2: `GDeflate` encoding (replaces v1 LZ77).
22pub const FORMAT_VERSION: u32 = 2;
23
24/// Default tile size: 64KB (65536 bytes)
25pub const DEFAULT_TILE_SIZE: u32 = 65536;
26
27/// Default sub-stream count: 32 (matches GPU warp width)
28pub const DEFAULT_SUB_STREAM_COUNT: u8 = 32;
29
30/// Alignment boundary for tile payloads (128 bytes for GPU memory coalescing)
31pub const TILE_ALIGNMENT: usize = 128;
32
33/// Current engine semantic version (set at build time).
34pub const ENGINE_VERSION_STR: &str = env!("CARGO_PKG_VERSION");
35
36// ---------------------------------------------------------------------------
37// EngineVersion (8 bytes)
38// ---------------------------------------------------------------------------
39
40/// Packed 8-byte semantic version of the producing engine.
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub struct EngineVersion {
43    pub major: u16,
44    pub minor: u16,
45    pub patch: u16,
46    pub pre: u8,
47    pub build: u8,
48}
49
50impl EngineVersion {
51    /// Parse the crate's `CARGO_PKG_VERSION` into an [`EngineVersion`].
52    #[must_use]
53    pub fn current() -> Self {
54        let v = ENGINE_VERSION_STR;
55        let mut parts = v.split('.');
56        let major = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
57        let minor = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
58        let patch = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
59        Self {
60            major,
61            minor,
62            patch,
63            pre: 0,
64            build: 0,
65        }
66    }
67
68    /// Serialize to 8 bytes (little-endian).
69    #[must_use]
70    pub fn to_bytes(self) -> [u8; 8] {
71        let mut b = [0u8; 8];
72        b[0..2].copy_from_slice(&self.major.to_le_bytes());
73        b[2..4].copy_from_slice(&self.minor.to_le_bytes());
74        b[4..6].copy_from_slice(&self.patch.to_le_bytes());
75        b[6] = self.pre;
76        b[7] = self.build;
77        b
78    }
79
80    /// Deserialize from 8 bytes.
81    #[must_use]
82    pub fn from_bytes(b: &[u8; 8]) -> Self {
83        Self {
84            major: u16::from_le_bytes([b[0], b[1]]),
85            minor: u16::from_le_bytes([b[2], b[3]]),
86            patch: u16::from_le_bytes([b[4], b[5]]),
87            pre: b[6],
88            build: b[7],
89        }
90    }
91}
92
93// ---------------------------------------------------------------------------
94// GpuFileFlags
95// ---------------------------------------------------------------------------
96
97/// Bitfield flags stored in the GPU file header.
98#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
99pub struct GpuFileFlags(pub u8);
100
101impl GpuFileFlags {
102    /// Bit 0: per-tile CRC32 checksums are present.
103    pub const CHECKSUMS_ENABLED: u8 = 0b0000_0001;
104    /// Bit 1: vectorized string matching was applied.
105    pub const VECTORIZE_USED: u8 = 0b0000_0010;
106    /// Bit 2: entropy was validated before compression.
107    pub const ENTROPY_CHECKED: u8 = 0b0000_0100;
108
109    #[must_use]
110    pub fn checksums_enabled(self) -> bool {
111        self.0 & Self::CHECKSUMS_ENABLED != 0
112    }
113
114    #[must_use]
115    pub fn vectorize_used(self) -> bool {
116        self.0 & Self::VECTORIZE_USED != 0
117    }
118
119    #[must_use]
120    pub fn entropy_checked(self) -> bool {
121        self.0 & Self::ENTROPY_CHECKED != 0
122    }
123
124    #[must_use]
125    pub fn with_checksums(mut self) -> Self {
126        self.0 |= Self::CHECKSUMS_ENABLED;
127        self
128    }
129
130    #[must_use]
131    pub fn with_vectorize(mut self) -> Self {
132        self.0 |= Self::VECTORIZE_USED;
133        self
134    }
135
136    #[must_use]
137    pub fn with_entropy_checked(mut self) -> Self {
138        self.0 |= Self::ENTROPY_CHECKED;
139        self
140    }
141}
142
143// ---------------------------------------------------------------------------
144// GpuFileHeader (64 bytes)
145// ---------------------------------------------------------------------------
146
147/// Fixed 64-byte header at the start of every GPU-compressed archive.
148#[derive(Debug, Clone, PartialEq, Eq)]
149pub struct GpuFileHeader {
150    pub magic: [u8; 4],
151    pub format_version: u32,
152    pub engine_version: EngineVersion,
153    pub tile_size: u32,
154    pub sub_stream_count: u8,
155    pub flags: GpuFileFlags,
156    pub uncompressed_size: u64,
157    pub tile_count: u64,
158}
159
160impl GpuFileHeader {
161    pub const SIZE: usize = 64;
162
163    /// Build a header with default tile size (64KB) and sub-stream count (32).
164    #[must_use]
165    pub fn new(tile_count: u64, uncompressed_size: u64) -> Self {
166        Self {
167            magic: CGPU_MAGIC,
168            format_version: FORMAT_VERSION,
169            engine_version: EngineVersion::current(),
170            tile_size: DEFAULT_TILE_SIZE,
171            sub_stream_count: DEFAULT_SUB_STREAM_COUNT,
172            flags: GpuFileFlags::default()
173                .with_checksums()
174                .with_entropy_checked(),
175            uncompressed_size,
176            tile_count,
177        }
178    }
179
180    /// Serialize to exactly 64 bytes (little-endian).
181    #[must_use]
182    pub fn to_bytes(&self) -> [u8; Self::SIZE] {
183        let mut b = [0u8; Self::SIZE];
184        b[0..4].copy_from_slice(&self.magic);
185        b[4..8].copy_from_slice(&self.format_version.to_le_bytes());
186        b[8..16].copy_from_slice(&self.engine_version.to_bytes());
187        b[16..20].copy_from_slice(&self.tile_size.to_le_bytes());
188        b[20] = self.sub_stream_count;
189        b[21] = self.flags.0;
190        // b[22..24] reserved
191        b[24..32].copy_from_slice(&self.uncompressed_size.to_le_bytes());
192        b[32..40].copy_from_slice(&self.tile_count.to_le_bytes());
193        // b[40..64] reserved
194        b
195    }
196
197    /// Deserialize and validate a 64-byte slice.
198    ///
199    /// # Errors
200    ///
201    /// - [`CrushError::InvalidFormat`] if magic bytes are wrong.
202    /// - [`CrushError::VersionMismatch`] if `format_version` differs.
203    pub fn from_bytes(b: &[u8; Self::SIZE]) -> Result<Self> {
204        let magic = [b[0], b[1], b[2], b[3]];
205        if magic != CGPU_MAGIC {
206            return Err(CrushError::InvalidFormat(format!(
207                "expected CGPU magic {CGPU_MAGIC:?}, got {magic:?}"
208            )));
209        }
210        let format_version = u32::from_le_bytes([b[4], b[5], b[6], b[7]]);
211        if format_version != FORMAT_VERSION {
212            let ev = EngineVersion::from_bytes(
213                b[8..16]
214                    .try_into()
215                    .map_err(|_| CrushError::InvalidFormat("header too short".to_owned()))?,
216            );
217            return Err(CrushError::VersionMismatch {
218                file_version: format!(
219                    "format v{format_version} (engine {}.{}.{})",
220                    ev.major, ev.minor, ev.patch
221                ),
222                current_version: format!("format v{FORMAT_VERSION} (engine {ENGINE_VERSION_STR})"),
223            });
224        }
225        let engine_version = EngineVersion::from_bytes(
226            b[8..16]
227                .try_into()
228                .map_err(|_| CrushError::InvalidFormat("header too short".to_owned()))?,
229        );
230        Ok(Self {
231            magic,
232            format_version,
233            engine_version,
234            tile_size: u32::from_le_bytes([b[16], b[17], b[18], b[19]]),
235            sub_stream_count: b[20],
236            flags: GpuFileFlags(b[21]),
237            uncompressed_size: u64::from_le_bytes(
238                b[24..32]
239                    .try_into()
240                    .map_err(|_| CrushError::InvalidFormat("header truncated".to_owned()))?,
241            ),
242            tile_count: u64::from_le_bytes(
243                b[32..40]
244                    .try_into()
245                    .map_err(|_| CrushError::InvalidFormat("header truncated".to_owned()))?,
246            ),
247        })
248    }
249}
250
251// ---------------------------------------------------------------------------
252// TileFlags
253// ---------------------------------------------------------------------------
254
255/// Per-tile bitfield flags stored in the tile header.
256#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
257pub struct TileFlags(pub u8);
258
259impl TileFlags {
260    /// Bit 0: tile is stored raw (uncompressed).
261    pub const STORED: u8 = 0b0000_0001;
262    /// Bit 1: this is the final tile (may be <64KB).
263    pub const LAST_TILE: u8 = 0b0000_0010;
264
265    #[must_use]
266    pub fn stored(self) -> bool {
267        self.0 & Self::STORED != 0
268    }
269
270    #[must_use]
271    pub fn last_tile(self) -> bool {
272        self.0 & Self::LAST_TILE != 0
273    }
274
275    #[must_use]
276    pub fn with_stored(mut self) -> Self {
277        self.0 |= Self::STORED;
278        self
279    }
280
281    #[must_use]
282    pub fn with_last_tile(mut self) -> Self {
283        self.0 |= Self::LAST_TILE;
284        self
285    }
286}
287
288// ---------------------------------------------------------------------------
289// TileHeader (32 bytes)
290// ---------------------------------------------------------------------------
291
292/// Fixed 32-byte header preceding each compressed tile's payload.
293#[derive(Debug, Clone, Copy, PartialEq, Eq)]
294pub struct TileHeader {
295    /// Tile format version (initially 1). Decompressor rejects unknown versions.
296    pub version: u8,
297    pub flags: TileFlags,
298    pub sub_stream_count: u8,
299    pub compressed_size: u32,
300    pub uncompressed_size: u32,
301    /// CRC32 of the uncompressed tile data. 0 if checksums disabled.
302    pub checksum: u32,
303    /// Size of the sub-stream offset table in bytes.
304    pub sub_stream_offsets_size: u32,
305}
306
307impl TileHeader {
308    pub const SIZE: usize = 32;
309
310    /// Serialize to 32 bytes (little-endian).
311    #[must_use]
312    pub fn to_bytes(self) -> [u8; Self::SIZE] {
313        let mut b = [0u8; Self::SIZE];
314        b[0] = self.version;
315        b[1] = self.flags.0;
316        b[2] = self.sub_stream_count;
317        // b[3] reserved
318        b[4..8].copy_from_slice(&self.compressed_size.to_le_bytes());
319        b[8..12].copy_from_slice(&self.uncompressed_size.to_le_bytes());
320        b[12..16].copy_from_slice(&self.checksum.to_le_bytes());
321        b[16..20].copy_from_slice(&self.sub_stream_offsets_size.to_le_bytes());
322        // b[20..32] reserved
323        b
324    }
325
326    /// Deserialize from 32 bytes.
327    #[must_use]
328    pub fn from_bytes(b: &[u8; Self::SIZE]) -> Self {
329        Self {
330            version: b[0],
331            flags: TileFlags(b[1]),
332            sub_stream_count: b[2],
333            compressed_size: u32::from_le_bytes([b[4], b[5], b[6], b[7]]),
334            uncompressed_size: u32::from_le_bytes([b[8], b[9], b[10], b[11]]),
335            checksum: u32::from_le_bytes([b[12], b[13], b[14], b[15]]),
336            sub_stream_offsets_size: u32::from_le_bytes([b[16], b[17], b[18], b[19]]),
337        }
338    }
339}
340
341// ---------------------------------------------------------------------------
342// TileIndexEntry (24 bytes)
343// ---------------------------------------------------------------------------
344
345/// One entry per tile in the trailing index.
346#[derive(Debug, Clone, Copy, PartialEq, Eq)]
347pub struct TileIndexEntry {
348    /// Absolute byte offset of the [`TileHeader`] from start of file.
349    pub tile_offset: u64,
350    pub compressed_size: u32,
351    pub uncompressed_size: u32,
352    pub checksum: u32,
353    pub flags: u32,
354}
355
356impl TileIndexEntry {
357    pub const SIZE: usize = 24;
358
359    /// Serialize to 24 bytes (little-endian).
360    #[must_use]
361    pub fn to_bytes(self) -> [u8; Self::SIZE] {
362        let mut b = [0u8; Self::SIZE];
363        b[0..8].copy_from_slice(&self.tile_offset.to_le_bytes());
364        b[8..12].copy_from_slice(&self.compressed_size.to_le_bytes());
365        b[12..16].copy_from_slice(&self.uncompressed_size.to_le_bytes());
366        b[16..20].copy_from_slice(&self.checksum.to_le_bytes());
367        b[20..24].copy_from_slice(&self.flags.to_le_bytes());
368        b
369    }
370
371    /// Deserialize from 24 bytes.
372    #[must_use]
373    pub fn from_bytes(b: &[u8; Self::SIZE]) -> Self {
374        Self {
375            tile_offset: u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
376            compressed_size: u32::from_le_bytes([b[8], b[9], b[10], b[11]]),
377            uncompressed_size: u32::from_le_bytes([b[12], b[13], b[14], b[15]]),
378            checksum: u32::from_le_bytes([b[16], b[17], b[18], b[19]]),
379            flags: u32::from_le_bytes([b[20], b[21], b[22], b[23]]),
380        }
381    }
382}
383
384// ---------------------------------------------------------------------------
385// TileIndexHeader (8 bytes)
386// ---------------------------------------------------------------------------
387
388/// 8-byte header immediately before the tile index entries.
389#[derive(Debug, Clone, Copy, PartialEq, Eq)]
390pub struct TileIndexHeader {
391    pub entry_count: u32,
392    /// Reserved — must be 0.
393    pub index_flags: u32,
394}
395
396impl TileIndexHeader {
397    pub const SIZE: usize = 8;
398
399    #[must_use]
400    pub fn to_bytes(self) -> [u8; Self::SIZE] {
401        let mut b = [0u8; Self::SIZE];
402        b[0..4].copy_from_slice(&self.entry_count.to_le_bytes());
403        b[4..8].copy_from_slice(&self.index_flags.to_le_bytes());
404        b
405    }
406
407    #[must_use]
408    pub fn from_bytes(b: &[u8; Self::SIZE]) -> Self {
409        Self {
410            entry_count: u32::from_le_bytes([b[0], b[1], b[2], b[3]]),
411            index_flags: u32::from_le_bytes([b[4], b[5], b[6], b[7]]),
412        }
413    }
414}
415
416// ---------------------------------------------------------------------------
417// GpuFileFooter (24 bytes)
418// ---------------------------------------------------------------------------
419
420/// Fixed 24-byte record at the very end of every GPU archive.
421#[derive(Debug, Clone, Copy, PartialEq, Eq)]
422pub struct GpuFileFooter {
423    /// Absolute byte offset of the [`TileIndexHeader`] from start of file.
424    pub index_offset: u64,
425    /// Byte length of the index region.
426    pub index_size: u32,
427    /// CRC32 of bytes [0..12] of this footer.
428    pub footer_checksum: u32,
429    /// Redundant copy of format version.
430    pub format_version: u32,
431    pub magic: [u8; 4],
432}
433
434impl GpuFileFooter {
435    pub const SIZE: usize = 24;
436
437    /// Build and compute the footer checksum.
438    #[must_use]
439    pub fn new(index_offset: u64, index_size: u32) -> Self {
440        let mut f = Self {
441            index_offset,
442            index_size,
443            footer_checksum: 0,
444            format_version: FORMAT_VERSION,
445            magic: CGPU_MAGIC,
446        };
447        f.footer_checksum = f.compute_checksum();
448        f
449    }
450
451    fn compute_checksum(self) -> u32 {
452        let b = self.to_bytes_unchecked();
453        let mut h = Hasher::new();
454        h.update(&b[0..12]);
455        h.finalize()
456    }
457
458    fn to_bytes_unchecked(self) -> [u8; Self::SIZE] {
459        let mut b = [0u8; Self::SIZE];
460        b[0..8].copy_from_slice(&self.index_offset.to_le_bytes());
461        b[8..12].copy_from_slice(&self.index_size.to_le_bytes());
462        b[12..16].copy_from_slice(&self.footer_checksum.to_le_bytes());
463        b[16..20].copy_from_slice(&self.format_version.to_le_bytes());
464        b[20..24].copy_from_slice(&self.magic);
465        b
466    }
467
468    /// Serialize to 24 bytes.
469    #[must_use]
470    pub fn to_bytes(self) -> [u8; Self::SIZE] {
471        self.to_bytes_unchecked()
472    }
473
474    /// Deserialize and validate a 24-byte footer.
475    ///
476    /// # Errors
477    ///
478    /// - [`CrushError::InvalidFormat`] if magic is wrong or checksum fails.
479    /// - [`CrushError::VersionMismatch`] if format version differs.
480    pub fn from_bytes(b: &[u8; Self::SIZE]) -> Result<Self> {
481        let magic = [b[20], b[21], b[22], b[23]];
482        if magic != CGPU_MAGIC {
483            return Err(CrushError::InvalidFormat(format!(
484                "footer magic {magic:?} does not match CGPU"
485            )));
486        }
487        let format_version = u32::from_le_bytes([b[16], b[17], b[18], b[19]]);
488        if format_version != FORMAT_VERSION {
489            return Err(CrushError::VersionMismatch {
490                file_version: format!("format v{format_version}"),
491                current_version: format!("format v{FORMAT_VERSION} (engine {ENGINE_VERSION_STR})"),
492            });
493        }
494        let footer = Self {
495            index_offset: u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
496            index_size: u32::from_le_bytes([b[8], b[9], b[10], b[11]]),
497            footer_checksum: u32::from_le_bytes([b[12], b[13], b[14], b[15]]),
498            format_version,
499            magic,
500        };
501        let expected = {
502            let mut h = Hasher::new();
503            h.update(&b[0..12]);
504            h.finalize()
505        };
506        if footer.footer_checksum != expected {
507            return Err(CrushError::IndexCorrupted(format!(
508                "GPU footer checksum mismatch: expected {expected:#010x}, got {:#010x}",
509                footer.footer_checksum
510            )));
511        }
512        Ok(footer)
513    }
514}
515
516/// Compute padding needed to align a size to the tile alignment boundary.
517#[must_use]
518pub fn padding_to_alignment(size: usize) -> usize {
519    let remainder = size % TILE_ALIGNMENT;
520    if remainder == 0 {
521        0
522    } else {
523        TILE_ALIGNMENT - remainder
524    }
525}