Skip to main content

structured_zstd/encoding/
frame_emit_info.rs

1//! Structural metadata describing the layout of an emitted zstd frame.
2//!
3//! Surfaced via [`FrameCompressor::last_frame_emit_info`] (encode side)
4//! after every successful `compress()`. Lets storage-format consumers
5//! discover where each Block_Header / block body / optional content
6//! checksum lands in the byte buffer without re-parsing the frame
7//! themselves.
8//!
9//! Gated behind the `lsm` Cargo feature (default off) โ€” the
10//! `FrameCompressor` field that stores this info, the methods that
11//! return it, and these public types only exist when the feature is
12//! enabled. Without `lsm` the C FFI surface stays strict drop-in for
13//! donor `libzstd` v1.5.7.
14//!
15//! [`FrameCompressor::last_frame_emit_info`]: super::FrameCompressor::last_frame_emit_info
16
17extern crate alloc;
18
19use alloc::vec::Vec;
20
21pub use crate::blocks::block::BlockType;
22
23/// Layout of a single zstd block inside an emitted frame.
24///
25/// Offsets are absolute byte positions in the emitted-frame buffer:
26/// `offset_in_frame` points at the first byte of the 3-byte
27/// `Block_Header`, and the block body lives at
28/// `offset_in_frame + header_size .. offset_in_frame + header_size +
29/// body_size`. The arithmetic
30/// `offset_in_frame + header_size as u32 + body_size`
31/// is the byte offset of the next block (or, on the last block, of
32/// the trailing checksum / end of frame).
33///
34/// For RLE blocks the `body_size` is `1` (the single repeated byte
35/// on the wire); the spec's `Block_Size` field carries the logical
36/// repeat count instead and is surfaced separately as
37/// [`block_size_field`](Self::block_size_field).
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct FrameBlock {
40    /// Byte offset of this block's `Block_Header` within the emitted
41    /// frame buffer (frame-absolute, includes the bytes consumed by
42    /// the frame header / magic / FCS that precede the first block).
43    pub offset_in_frame: u32,
44    /// Size of the `Block_Header` in bytes. Always `3` today; carried
45    /// as a field so the API stays forward-compatible with any future
46    /// spec extension that widens the header.
47    pub header_size: u8,
48    /// Physical length of this block's body in bytes on the wire (does
49    /// NOT include `header_size`). For Raw / Compressed blocks this is
50    /// the number of bytes after the header; for RLE blocks this is
51    /// always `1` (the repeated byte itself, while the spec's
52    /// `Block_Size` field encodes the logical repeat count โ€” see
53    /// [`block_size_field`](Self::block_size_field)). The arithmetic
54    /// `offset_in_frame + header_size as u32 + body_size` always
55    /// lands on the next block boundary.
56    pub body_size: u32,
57    /// Raw `Block_Size` value from the 3-byte `Block_Header`. For Raw
58    /// and Compressed blocks this equals `body_size`; for RLE blocks
59    /// it's the logical repeat count (how many bytes the single
60    /// physical body byte expands to during decode) and will differ
61    /// from `body_size` (which is `1`).
62    pub block_size_field: u32,
63    /// Whether the block is Raw, RLE, or Compressed per RFC 8878
64    /// ยง3.1.1.2.1 (`Block_Type`).
65    pub block_type: BlockType,
66    /// `true` only on the final block of the frame (matches the
67    /// `Last_Block` flag in `Block_Header`).
68    pub last_block: bool,
69    /// Decompressed (regenerated) size of this block's output in bytes.
70    ///
71    /// For Raw and RLE blocks this is recoverable from the wire
72    /// (`block_size_field`), but a Compressed block's regenerated size is
73    /// NOT in its `Block_Header` (the header's `Block_Size` is the
74    /// *compressed* length), so the encoder captures it from the input
75    /// chunk that produced the block. Consumers map a decompressed byte
76    /// offset to a block index via the prefix sum of this field; see
77    /// [`FrameEmitInfo::decompressed_byte_range`].
78    ///
79    /// On the decode error path ([`FailedToReadBlockBodyAt`]), where the
80    /// regenerated size of a failed Compressed block is unknown, this is
81    /// `0` for Compressed blocks (Raw/RLE still carry their wire size).
82    ///
83    /// [`FailedToReadBlockBodyAt`]: crate::decoding::errors::FrameDecoderError::FailedToReadBlockBodyAt
84    pub decompressed_size: u32,
85}
86
87/// Complete layout of an emitted zstd frame.
88///
89/// Captures the byte positions of the frame header, every block, and
90/// the optional trailing content checksum. The ranges are `u32` byte
91/// offsets into the emitted buffer (`compressed_data` sink of
92/// [`FrameCompressor`]).
93///
94/// [`FrameCompressor`]: super::FrameCompressor
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub struct FrameEmitInfo {
97    /// Byte range of the frame header (magic number + frame-header
98    /// fields). For magicless frames the magic is omitted but the
99    /// range still starts at offset 0.
100    pub frame_header_range: core::ops::Range<u32>,
101    /// One entry per emitted block, in stream order. The last entry
102    /// has `last_block = true`.
103    pub blocks: Vec<FrameBlock>,
104    /// Byte range of the trailing 4-byte content checksum (XXH64
105    /// truncated to low 32 bits). `None` if the frame was emitted
106    /// without `content_checksum`.
107    pub checksum_range: Option<core::ops::Range<u32>>,
108    /// Total emitted frame size in bytes (one past the last byte of
109    /// the frame).
110    pub total_size: u32,
111}
112
113impl FrameEmitInfo {
114    /// Half-open decompressed byte range `[start, end)` of `blocks[block_index]`
115    /// within the frame's full decompressed output, computed as the prefix
116    /// sum of every preceding block's [`FrameBlock::decompressed_size`].
117    ///
118    /// This is the mapping a range-query consumer uses to turn a
119    /// decompressed byte offset into the inner-block index needed by
120    /// [`FrameDecoder::decode_blocks_partial`]: find the first block whose
121    /// range contains the offset.
122    ///
123    /// Returns `None` if `block_index` is out of bounds.
124    ///
125    /// [`FrameDecoder::decode_blocks_partial`]: crate::decoding::FrameDecoder::decode_blocks_partial
126    ///
127    /// # Examples
128    ///
129    /// ```
130    /// # #[cfg(feature = "lsm")] {
131    /// use structured_zstd::encoding::frame_emit_info::{FrameBlock, FrameEmitInfo, BlockType};
132    /// let info = FrameEmitInfo {
133    ///     frame_header_range: 0..6,
134    ///     blocks: vec![
135    ///         FrameBlock { offset_in_frame: 6, header_size: 3, body_size: 10,
136    ///             block_size_field: 10, block_type: BlockType::Compressed,
137    ///             last_block: false, decompressed_size: 100 },
138    ///         FrameBlock { offset_in_frame: 19, header_size: 3, body_size: 20,
139    ///             block_size_field: 20, block_type: BlockType::Compressed,
140    ///             last_block: true, decompressed_size: 40 },
141    ///     ],
142    ///     checksum_range: None,
143    ///     total_size: 42,
144    /// };
145    /// assert_eq!(info.decompressed_byte_range(0), Some(0..100));
146    /// assert_eq!(info.decompressed_byte_range(1), Some(100..140));
147    /// assert_eq!(info.decompressed_byte_range(2), None);
148    /// # }
149    /// ```
150    pub fn decompressed_byte_range(&self, block_index: usize) -> Option<core::ops::Range<u64>> {
151        let target = self.blocks.get(block_index)?;
152        // Prefix sum over preceding blocks. Block count is bounded by the
153        // frame's block count (each block is >= 3 wire bytes), so the
154        // accumulator stays well within u64.
155        let start: u64 = self.blocks[..block_index]
156            .iter()
157            .map(|b| u64::from(b.decompressed_size))
158            .sum();
159        Some(start..start + u64::from(target.decompressed_size))
160    }
161}