Skip to main content

haagenti_zstd/frame/
block.rs

1//! Zstd block header parsing.
2//!
3//! Each data block in a Zstd frame has a 3-byte header.
4
5use haagenti_core::{Error, Result};
6
7/// Block types in Zstd.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum BlockType {
10    /// Raw block - uncompressed data.
11    Raw,
12    /// RLE block - single byte repeated.
13    Rle,
14    /// Compressed block - uses Zstd compression.
15    Compressed,
16    /// Reserved - invalid, should not appear.
17    Reserved,
18}
19
20impl BlockType {
21    /// Parse block type from the type field (2 bits).
22    pub fn from_field(field: u8) -> Result<Self> {
23        match field {
24            0 => Ok(BlockType::Raw),
25            1 => Ok(BlockType::Rle),
26            2 => Ok(BlockType::Compressed),
27            3 => Err(Error::corrupted("Reserved block type")),
28            _ => unreachable!(),
29        }
30    }
31}
32
33/// Parsed block header.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub struct BlockHeader {
36    /// Whether this is the last block in the frame.
37    pub last_block: bool,
38    /// Block type.
39    pub block_type: BlockType,
40    /// Block size in bytes.
41    /// For Raw: size of uncompressed data
42    /// For RLE: decompressed size (compressed is 1 byte)
43    /// For Compressed: size of compressed data
44    pub block_size: usize,
45}
46
47impl BlockHeader {
48    /// Block header size in bytes.
49    pub const SIZE: usize = 3;
50
51    /// Maximum block size (128 KB - 1).
52    pub const MAX_BLOCK_SIZE: usize = (1 << 17) - 1;
53
54    /// Parse a block header from 3 bytes.
55    ///
56    /// ```text
57    /// Byte 0-2 (little-endian):
58    ///   Bit 0:     Last_Block flag
59    ///   Bits 1-2:  Block_Type
60    ///   Bits 3-23: Block_Size (21 bits)
61    /// ```
62    pub fn parse(data: &[u8]) -> Result<Self> {
63        if data.len() < Self::SIZE {
64            return Err(Error::corrupted(format!(
65                "Block header too short: {} bytes, need {}",
66                data.len(),
67                Self::SIZE
68            )));
69        }
70
71        // Read 3 bytes as little-endian 24-bit integer
72        let header = data[0] as u32 | ((data[1] as u32) << 8) | ((data[2] as u32) << 16);
73
74        let last_block = (header & 0x01) != 0;
75        let block_type_field = ((header >> 1) & 0x03) as u8;
76        let block_size = (header >> 3) as usize;
77
78        let block_type = BlockType::from_field(block_type_field)?;
79
80        if block_size > Self::MAX_BLOCK_SIZE {
81            return Err(Error::corrupted(format!(
82                "Block size {} exceeds maximum {}",
83                block_size,
84                Self::MAX_BLOCK_SIZE
85            )));
86        }
87
88        Ok(Self {
89            last_block,
90            block_type,
91            block_size,
92        })
93    }
94
95    /// Get the size of compressed data to read.
96    /// For RLE blocks, this is 1 (the byte to repeat).
97    pub fn compressed_size(&self) -> usize {
98        match self.block_type {
99            BlockType::Raw => self.block_size,
100            BlockType::Rle => 1,
101            BlockType::Compressed => self.block_size,
102            BlockType::Reserved => 0,
103        }
104    }
105
106    /// Get the size of decompressed output.
107    pub fn decompressed_size(&self) -> usize {
108        self.block_size
109    }
110}
111
112// =============================================================================
113// Tests
114// =============================================================================
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    #[test]
121    fn test_block_type_parsing() {
122        assert_eq!(BlockType::from_field(0).unwrap(), BlockType::Raw);
123        assert_eq!(BlockType::from_field(1).unwrap(), BlockType::Rle);
124        assert_eq!(BlockType::from_field(2).unwrap(), BlockType::Compressed);
125        assert!(BlockType::from_field(3).is_err());
126    }
127
128    #[test]
129    fn test_raw_block_header() {
130        // Raw block, not last, size = 100
131        // Encoding: last=0, type=00, size=100
132        // Header = (100 << 3) | (0 << 1) | 0 = 800 = 0x320
133        // Little-endian: [0x20, 0x03, 0x00]
134        let data = [0x20, 0x03, 0x00];
135        let header = BlockHeader::parse(&data).unwrap();
136
137        assert!(!header.last_block);
138        assert_eq!(header.block_type, BlockType::Raw);
139        assert_eq!(header.block_size, 100);
140        assert_eq!(header.compressed_size(), 100);
141        assert_eq!(header.decompressed_size(), 100);
142    }
143
144    #[test]
145    fn test_rle_block_header() {
146        // RLE block, last block, size = 1000
147        // Encoding: last=1, type=01, size=1000
148        // Header = (1000 << 3) | (1 << 1) | 1 = 8003 = 0x1F43
149        // Little-endian: [0x43, 0x1F, 0x00]
150        let data = [0x43, 0x1F, 0x00];
151        let header = BlockHeader::parse(&data).unwrap();
152
153        assert!(header.last_block);
154        assert_eq!(header.block_type, BlockType::Rle);
155        assert_eq!(header.block_size, 1000);
156        assert_eq!(header.compressed_size(), 1); // RLE is always 1 byte compressed
157        assert_eq!(header.decompressed_size(), 1000);
158    }
159
160    #[test]
161    fn test_compressed_block_header() {
162        // Compressed block, not last, size = 50000
163        // Encoding: last=0, type=10, size=50000
164        // Header = (50000 << 3) | (2 << 1) | 0 = 400004 = 0x61A84
165        // Little-endian: [0x84, 0x1A, 0x06]
166        let data = [0x84, 0x1A, 0x06];
167        let header = BlockHeader::parse(&data).unwrap();
168
169        assert!(!header.last_block);
170        assert_eq!(header.block_type, BlockType::Compressed);
171        assert_eq!(header.block_size, 50000);
172        assert_eq!(header.compressed_size(), 50000);
173    }
174
175    #[test]
176    fn test_last_block_flag() {
177        // Same as raw block but with last_block = 1
178        // Header = (100 << 3) | (0 << 1) | 1 = 801 = 0x321
179        let data = [0x21, 0x03, 0x00];
180        let header = BlockHeader::parse(&data).unwrap();
181
182        assert!(header.last_block);
183        assert_eq!(header.block_type, BlockType::Raw);
184        assert_eq!(header.block_size, 100);
185    }
186
187    #[test]
188    fn test_max_block_size() {
189        // Maximum size: 2^17 - 1 = 131071
190        // Header = (131071 << 3) | (0 << 1) | 0 = 1048568 = 0xFFFF8
191        let data = [0xF8, 0xFF, 0x0F];
192        let header = BlockHeader::parse(&data).unwrap();
193
194        assert_eq!(header.block_size, 131071);
195        assert_eq!(header.block_size, BlockHeader::MAX_BLOCK_SIZE);
196    }
197
198    #[test]
199    fn test_block_size_too_large() {
200        // Size = 131072 (one more than max)
201        // Header = (131072 << 3) | 0 = 1048576 = 0x100000
202        let data = [0x00, 0x00, 0x10];
203        let result = BlockHeader::parse(&data);
204        assert!(result.is_err());
205    }
206
207    #[test]
208    fn test_reserved_block_type_error() {
209        // Reserved block type (type = 3)
210        // Header = (0 << 3) | (3 << 1) | 0 = 6
211        let data = [0x06, 0x00, 0x00];
212        let result = BlockHeader::parse(&data);
213        assert!(result.is_err());
214    }
215
216    #[test]
217    fn test_header_too_short() {
218        let result = BlockHeader::parse(&[0x00, 0x00]);
219        assert!(result.is_err());
220    }
221
222    #[test]
223    fn test_zero_size_block() {
224        // Zero-size raw block
225        let data = [0x00, 0x00, 0x00];
226        let header = BlockHeader::parse(&data).unwrap();
227
228        assert_eq!(header.block_size, 0);
229        assert_eq!(header.compressed_size(), 0);
230    }
231}