Skip to main content

haagenti_zstd/frame/
header.rs

1//! Zstd frame header parsing.
2//!
3//! The frame header contains the frame descriptor and optional fields.
4
5use haagenti_core::{Error, Result};
6
7/// Frame header descriptor byte.
8///
9/// ```text
10/// Bit 7-6: Frame_Content_Size_flag
11/// Bit 5:   Single_Segment_flag
12/// Bit 4:   Unused_bit (must be 0)
13/// Bit 3:   Reserved_bit (must be 0)
14/// Bit 2:   Content_Checksum_flag
15/// Bit 1-0: Dictionary_ID_flag
16/// ```
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub struct FrameDescriptor {
19    /// Raw descriptor byte.
20    raw: u8,
21}
22
23impl FrameDescriptor {
24    /// Parse a frame descriptor from a byte.
25    pub fn new(byte: u8) -> Result<Self> {
26        // Check reserved bit (bit 3)
27        if byte & 0x08 != 0 {
28            return Err(Error::corrupted(
29                "Reserved bit in frame descriptor must be 0",
30            ));
31        }
32
33        Ok(Self { raw: byte })
34    }
35
36    /// Get the Frame_Content_Size_flag (bits 7-6).
37    /// Returns the number of bytes used for frame content size:
38    /// 0 = 0 bytes, 1 = 1 byte, 2 = 2 bytes, 3 = 8 bytes
39    #[inline]
40    pub fn frame_content_size_flag(&self) -> u8 {
41        (self.raw >> 6) & 0x03
42    }
43
44    /// Get the number of bytes for the frame content size field.
45    pub fn frame_content_size_bytes(&self) -> usize {
46        match self.frame_content_size_flag() {
47            0 => {
48                if self.single_segment_flag() {
49                    1 // Single segment mode uses 1 byte
50                } else {
51                    0 // No FCS field
52                }
53            }
54            1 => 2,
55            2 => 4,
56            3 => 8,
57            _ => unreachable!(),
58        }
59    }
60
61    /// Get the Single_Segment_flag (bit 5).
62    /// When set, window size is derived from frame content size.
63    #[inline]
64    pub fn single_segment_flag(&self) -> bool {
65        (self.raw & 0x20) != 0
66    }
67
68    /// Get the Content_Checksum_flag (bit 2).
69    /// When set, a 4-byte XXHash64 checksum follows the last block.
70    #[inline]
71    pub fn content_checksum_flag(&self) -> bool {
72        (self.raw & 0x04) != 0
73    }
74
75    /// Get the Dictionary_ID_flag (bits 1-0).
76    /// Returns the number of bytes for dictionary ID: 0, 1, 2, or 4.
77    #[inline]
78    pub fn dictionary_id_flag(&self) -> u8 {
79        self.raw & 0x03
80    }
81
82    /// Get the number of bytes for the dictionary ID field.
83    pub fn dictionary_id_bytes(&self) -> usize {
84        match self.dictionary_id_flag() {
85            0 => 0,
86            1 => 1,
87            2 => 2,
88            3 => 4,
89            _ => unreachable!(),
90        }
91    }
92
93    /// Whether this frame requires a window descriptor byte.
94    #[inline]
95    pub fn has_window_descriptor(&self) -> bool {
96        !self.single_segment_flag()
97    }
98}
99
100/// Parsed Zstd frame header.
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct FrameHeader {
103    /// Frame descriptor.
104    pub descriptor: FrameDescriptor,
105    /// Window size in bytes.
106    pub window_size: usize,
107    /// Dictionary ID (0 if not present).
108    pub dictionary_id: u32,
109    /// Frame content size (None if not present).
110    pub frame_content_size: Option<u64>,
111    /// Whether content checksum is present.
112    pub has_checksum: bool,
113    /// Total header size in bytes (including magic number).
114    pub header_size: usize,
115}
116
117impl FrameHeader {
118    /// Parse a frame header from the input buffer.
119    ///
120    /// The buffer should start at the frame descriptor (after magic number).
121    pub fn parse(data: &[u8]) -> Result<Self> {
122        if data.is_empty() {
123            return Err(Error::corrupted("Empty frame header"));
124        }
125
126        let descriptor = FrameDescriptor::new(data[0])?;
127        let mut offset = 1;
128
129        // Parse window descriptor if present
130        let window_size = if descriptor.has_window_descriptor() {
131            if data.len() < offset + 1 {
132                return Err(Error::corrupted(
133                    "Frame header truncated at window descriptor",
134                ));
135            }
136            let window_byte = data[offset];
137            offset += 1;
138            Self::decode_window_size(window_byte)?
139        } else {
140            // Will be determined from frame content size
141            0
142        };
143
144        // Parse dictionary ID if present
145        let dict_bytes = descriptor.dictionary_id_bytes();
146        let dictionary_id = if dict_bytes > 0 {
147            if data.len() < offset + dict_bytes {
148                return Err(Error::corrupted("Frame header truncated at dictionary ID"));
149            }
150            let dict_id = Self::read_le_uint(&data[offset..], dict_bytes)?;
151            offset += dict_bytes;
152            dict_id as u32
153        } else {
154            0
155        };
156
157        // Parse frame content size if present
158        let fcs_bytes = descriptor.frame_content_size_bytes();
159        let frame_content_size = if fcs_bytes > 0 {
160            if data.len() < offset + fcs_bytes {
161                return Err(Error::corrupted(
162                    "Frame header truncated at frame content size",
163                ));
164            }
165            let mut fcs = Self::read_le_uint(&data[offset..], fcs_bytes)?;
166            // For 2-byte FCS, add 256
167            if fcs_bytes == 2 {
168                fcs += 256;
169            }
170            offset += fcs_bytes;
171            Some(fcs)
172        } else {
173            None
174        };
175
176        // Determine final window size
177        let final_window_size = if descriptor.single_segment_flag() {
178            frame_content_size.unwrap_or(0) as usize
179        } else {
180            window_size
181        };
182
183        Ok(Self {
184            descriptor,
185            window_size: final_window_size,
186            dictionary_id,
187            frame_content_size,
188            has_checksum: descriptor.content_checksum_flag(),
189            header_size: 4 + offset, // 4 bytes for magic + header bytes
190        })
191    }
192
193    /// Decode window size from the window descriptor byte.
194    fn decode_window_size(byte: u8) -> Result<usize> {
195        let exponent = (byte >> 3) as u32;
196        let mantissa = (byte & 0x07) as usize;
197
198        if exponent > 41 {
199            return Err(Error::corrupted(format!(
200                "Window size exponent {} exceeds maximum",
201                exponent
202            )));
203        }
204
205        // window_base = 1 << (10 + exponent)
206        // window_add = (window_base / 8) * mantissa
207        // window_size = window_base + window_add
208
209        let window_base = 1usize << (10 + exponent);
210        let window_add = (window_base >> 3) * mantissa;
211        let window_size = window_base + window_add;
212
213        if window_size > super::MAX_WINDOW_SIZE {
214            return Err(Error::corrupted(format!(
215                "Window size {} exceeds maximum {}",
216                window_size,
217                super::MAX_WINDOW_SIZE
218            )));
219        }
220
221        Ok(window_size)
222    }
223
224    /// Read a little-endian unsigned integer of the given size.
225    fn read_le_uint(data: &[u8], size: usize) -> Result<u64> {
226        if data.len() < size {
227            return Err(Error::corrupted("Insufficient data for integer"));
228        }
229
230        let mut result = 0u64;
231        for (i, &byte) in data.iter().enumerate().take(size) {
232            result |= (byte as u64) << (8 * i);
233        }
234        Ok(result)
235    }
236}
237
238// =============================================================================
239// Tests
240// =============================================================================
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn test_frame_descriptor_flags() {
248        // Test descriptor with all flags
249        // FCS=3 (bits 7-6 = 11), Single=1 (bit 5), Checksum=1 (bit 2), DictID=3 (bits 1-0)
250        let desc = FrameDescriptor::new(0b11100111).unwrap();
251        assert_eq!(desc.frame_content_size_flag(), 3);
252        assert!(desc.single_segment_flag());
253        assert!(desc.content_checksum_flag());
254        assert_eq!(desc.dictionary_id_flag(), 3);
255    }
256
257    #[test]
258    fn test_frame_descriptor_reserved_bit_error() {
259        // Reserved bit (bit 3) must be 0
260        let result = FrameDescriptor::new(0b00001000);
261        assert!(result.is_err());
262    }
263
264    #[test]
265    fn test_frame_descriptor_fcs_bytes() {
266        // FCS flag 0, no single segment -> 0 bytes
267        let desc = FrameDescriptor::new(0b00000000).unwrap();
268        assert_eq!(desc.frame_content_size_bytes(), 0);
269
270        // FCS flag 0, single segment -> 1 byte
271        let desc = FrameDescriptor::new(0b00100000).unwrap();
272        assert_eq!(desc.frame_content_size_bytes(), 1);
273
274        // FCS flag 1 -> 2 bytes
275        let desc = FrameDescriptor::new(0b01000000).unwrap();
276        assert_eq!(desc.frame_content_size_bytes(), 2);
277
278        // FCS flag 2 -> 4 bytes
279        let desc = FrameDescriptor::new(0b10000000).unwrap();
280        assert_eq!(desc.frame_content_size_bytes(), 4);
281
282        // FCS flag 3 -> 8 bytes
283        let desc = FrameDescriptor::new(0b11000000).unwrap();
284        assert_eq!(desc.frame_content_size_bytes(), 8);
285    }
286
287    #[test]
288    fn test_frame_descriptor_dict_bytes() {
289        // Dict ID flag 0 -> 0 bytes
290        let desc = FrameDescriptor::new(0b00000000).unwrap();
291        assert_eq!(desc.dictionary_id_bytes(), 0);
292
293        // Dict ID flag 1 -> 1 byte
294        let desc = FrameDescriptor::new(0b00000001).unwrap();
295        assert_eq!(desc.dictionary_id_bytes(), 1);
296
297        // Dict ID flag 2 -> 2 bytes
298        let desc = FrameDescriptor::new(0b00000010).unwrap();
299        assert_eq!(desc.dictionary_id_bytes(), 2);
300
301        // Dict ID flag 3 -> 4 bytes
302        let desc = FrameDescriptor::new(0b00000011).unwrap();
303        assert_eq!(desc.dictionary_id_bytes(), 4);
304    }
305
306    #[test]
307    fn test_window_descriptor_has() {
308        // No single segment -> has window descriptor
309        let desc = FrameDescriptor::new(0b00000000).unwrap();
310        assert!(desc.has_window_descriptor());
311
312        // Single segment -> no window descriptor
313        let desc = FrameDescriptor::new(0b00100000).unwrap();
314        assert!(!desc.has_window_descriptor());
315    }
316
317    #[test]
318    fn test_frame_header_minimal() {
319        // Minimal header: single segment, FCS=1 byte, no dict, no checksum
320        // Descriptor: 0b00100000 = 0x20
321        // FCS: 0x00 (size = 0)
322        let data = [0x20, 0x00];
323        let header = FrameHeader::parse(&data).unwrap();
324
325        assert!(header.descriptor.single_segment_flag());
326        assert_eq!(header.frame_content_size, Some(0));
327        assert_eq!(header.dictionary_id, 0);
328        assert!(!header.has_checksum);
329        assert_eq!(header.header_size, 4 + 2); // magic + descriptor + fcs
330    }
331
332    #[test]
333    fn test_frame_header_with_window() {
334        // Header with window descriptor
335        // Descriptor: 0b00000000 = 0x00 (has window, no FCS, no dict, no checksum)
336        // Window: exponent=0, mantissa=0 -> 1KB
337        let data = [0x00, 0x00];
338        let header = FrameHeader::parse(&data).unwrap();
339
340        assert_eq!(header.window_size, 1024);
341        assert_eq!(header.frame_content_size, None);
342        assert_eq!(header.header_size, 4 + 2);
343    }
344
345    #[test]
346    fn test_frame_header_with_dictionary() {
347        // Header with 4-byte dictionary ID
348        // Descriptor: 0b00100011 = 0x23 (single segment, FCS=1, dict=4 bytes)
349        // Dict ID: 0x12345678
350        // FCS: 0x10
351        let data = [0x23, 0x78, 0x56, 0x34, 0x12, 0x10];
352        let header = FrameHeader::parse(&data).unwrap();
353
354        assert_eq!(header.dictionary_id, 0x12345678);
355        assert_eq!(header.frame_content_size, Some(0x10));
356    }
357
358    #[test]
359    fn test_frame_header_with_checksum() {
360        // Header with checksum flag
361        // Descriptor: 0b00100100 = 0x24 (single segment, checksum)
362        let data = [0x24, 0x00]; // FCS = 0
363        let header = FrameHeader::parse(&data).unwrap();
364
365        assert!(header.has_checksum);
366    }
367
368    #[test]
369    fn test_frame_header_2byte_fcs() {
370        // 2-byte FCS: actual size = value + 256
371        // Descriptor: 0b01100000 = 0x60 (single segment, FCS=2 bytes)
372        // FCS: 0x0100 (little-endian) = 256 -> actual = 256 + 256 = 512
373        let data = [0x60, 0x00, 0x01];
374        let header = FrameHeader::parse(&data).unwrap();
375
376        assert_eq!(header.frame_content_size, Some(256 + 256));
377    }
378
379    #[test]
380    fn test_frame_header_8byte_fcs() {
381        // 8-byte FCS
382        // Descriptor: 0b11100000 = 0xE0 (single segment, FCS=8 bytes)
383        let data = [
384            0xE0, // descriptor
385            0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, // FCS
386        ];
387        let header = FrameHeader::parse(&data).unwrap();
388
389        assert_eq!(header.frame_content_size, Some(0x0807060504030201));
390    }
391
392    #[test]
393    fn test_window_size_decoding() {
394        // Exponent 0, mantissa 0: 1KB
395        assert_eq!(FrameHeader::decode_window_size(0x00).unwrap(), 1024);
396
397        // Exponent 0, mantissa 7: 1KB + 7/8*1KB = 1KB + 896 = 1920
398        assert_eq!(FrameHeader::decode_window_size(0x07).unwrap(), 1024 + 896);
399
400        // Exponent 10, mantissa 0: 1MB
401        assert_eq!(FrameHeader::decode_window_size(0x50).unwrap(), 1024 * 1024);
402
403        // Exponent 17, mantissa 0: 128MB (max)
404        assert_eq!(
405            FrameHeader::decode_window_size(0x88).unwrap(),
406            128 * 1024 * 1024
407        );
408    }
409
410    #[test]
411    fn test_window_size_too_large() {
412        // Exponent 18 would give 256MB, exceeding max
413        // 0x90 = exponent 18, mantissa 0
414        let result = FrameHeader::decode_window_size(0x90);
415        assert!(result.is_err());
416    }
417
418    #[test]
419    fn test_empty_header_error() {
420        let result = FrameHeader::parse(&[]);
421        assert!(result.is_err());
422    }
423
424    #[test]
425    fn test_truncated_header_error() {
426        // Descriptor requires window byte but it's missing
427        let result = FrameHeader::parse(&[0x00]);
428        assert!(result.is_err());
429    }
430}