structured_zstd/encoding/frame_emit_info.rs
1//! Structural metadata describing the layout of an emitted zstd frame.
2//!
3//! Surfaced via [`FrameCompressor::last_frame_emit_info`] (encode side)
4//! after every successful `compress()`. Lets storage-format consumers
5//! discover where each Block_Header / block body / optional content
6//! checksum lands in the byte buffer without re-parsing the frame
7//! themselves.
8//!
9//! Gated behind the `lsm` Cargo feature (default off) โ the
10//! `FrameCompressor` field that stores this info, the methods that
11//! return it, and these public types only exist when the feature is
12//! enabled. Without `lsm` the C FFI surface stays strict drop-in for
13//! donor `libzstd` v1.5.7.
14//!
15//! [`FrameCompressor::last_frame_emit_info`]: super::FrameCompressor::last_frame_emit_info
16
17extern crate alloc;
18
19use alloc::vec::Vec;
20
21pub use crate::blocks::block::BlockType;
22
23/// Layout of a single zstd block inside an emitted frame.
24///
25/// Offsets are absolute byte positions in the emitted-frame buffer:
26/// `offset_in_frame` points at the first byte of the 3-byte
27/// `Block_Header`, and the block body lives at
28/// `offset_in_frame + header_size .. offset_in_frame + header_size +
29/// body_size`. The arithmetic
30/// `offset_in_frame + header_size as u32 + body_size`
31/// is the byte offset of the next block (or, on the last block, of
32/// the trailing checksum / end of frame).
33///
34/// For RLE blocks the `body_size` is `1` (the single repeated byte
35/// on the wire); the spec's `Block_Size` field carries the logical
36/// repeat count instead and is surfaced separately as
37/// [`block_size_field`](Self::block_size_field).
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct FrameBlock {
40 /// Byte offset of this block's `Block_Header` within the emitted
41 /// frame buffer (frame-absolute, includes the bytes consumed by
42 /// the frame header / magic / FCS that precede the first block).
43 pub offset_in_frame: u32,
44 /// Size of the `Block_Header` in bytes. Always `3` today; carried
45 /// as a field so the API stays forward-compatible with any future
46 /// spec extension that widens the header.
47 pub header_size: u8,
48 /// Physical length of this block's body in bytes on the wire (does
49 /// NOT include `header_size`). For Raw / Compressed blocks this is
50 /// the number of bytes after the header; for RLE blocks this is
51 /// always `1` (the repeated byte itself, while the spec's
52 /// `Block_Size` field encodes the logical repeat count โ see
53 /// [`block_size_field`](Self::block_size_field)). The arithmetic
54 /// `offset_in_frame + header_size as u32 + body_size` always
55 /// lands on the next block boundary.
56 pub body_size: u32,
57 /// Raw `Block_Size` value from the 3-byte `Block_Header`. For Raw
58 /// and Compressed blocks this equals `body_size`; for RLE blocks
59 /// it's the logical repeat count (how many bytes the single
60 /// physical body byte expands to during decode) and will differ
61 /// from `body_size` (which is `1`).
62 pub block_size_field: u32,
63 /// Whether the block is Raw, RLE, or Compressed per RFC 8878
64 /// ยง3.1.1.2.1 (`Block_Type`).
65 pub block_type: BlockType,
66 /// `true` only on the final block of the frame (matches the
67 /// `Last_Block` flag in `Block_Header`).
68 pub last_block: bool,
69 /// Decompressed (regenerated) size of this block's output in bytes.
70 ///
71 /// For Raw and RLE blocks this is recoverable from the wire
72 /// (`block_size_field`), but a Compressed block's regenerated size is
73 /// NOT in its `Block_Header` (the header's `Block_Size` is the
74 /// *compressed* length), so the encoder captures it from the input
75 /// chunk that produced the block. Consumers map a decompressed byte
76 /// offset to a block index via the prefix sum of this field; see
77 /// [`FrameEmitInfo::decompressed_byte_range`].
78 ///
79 /// On the decode error path ([`FailedToReadBlockBodyAt`]), where the
80 /// regenerated size of a failed Compressed block is unknown, this is
81 /// `0` for Compressed blocks (Raw/RLE still carry their wire size).
82 ///
83 /// [`FailedToReadBlockBodyAt`]: crate::decoding::errors::FrameDecoderError::FailedToReadBlockBodyAt
84 pub decompressed_size: u32,
85}
86
87/// Complete layout of an emitted zstd frame.
88///
89/// Captures the byte positions of the frame header, every block, and
90/// the optional trailing content checksum. The ranges are `u32` byte
91/// offsets into the emitted buffer (`compressed_data` sink of
92/// [`FrameCompressor`]).
93///
94/// [`FrameCompressor`]: super::FrameCompressor
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub struct FrameEmitInfo {
97 /// Byte range of the frame header (magic number + frame-header
98 /// fields). For magicless frames the magic is omitted but the
99 /// range still starts at offset 0.
100 pub frame_header_range: core::ops::Range<u32>,
101 /// One entry per emitted block, in stream order. The last entry
102 /// has `last_block = true`.
103 pub blocks: Vec<FrameBlock>,
104 /// Byte range of the trailing 4-byte content checksum (XXH64
105 /// truncated to low 32 bits). `None` if the frame was emitted
106 /// without `content_checksum`.
107 pub checksum_range: Option<core::ops::Range<u32>>,
108 /// Total emitted frame size in bytes (one past the last byte of
109 /// the frame).
110 pub total_size: u32,
111}
112
113impl FrameEmitInfo {
114 /// Half-open decompressed byte range `[start, end)` of `blocks[block_index]`
115 /// within the frame's full decompressed output, computed as the prefix
116 /// sum of every preceding block's [`FrameBlock::decompressed_size`].
117 ///
118 /// This is the mapping a range-query consumer uses to turn a
119 /// decompressed byte offset into the inner-block index needed by
120 /// [`FrameDecoder::decode_blocks_partial`]: find the first block whose
121 /// range contains the offset.
122 ///
123 /// Returns `None` if `block_index` is out of bounds.
124 ///
125 /// [`FrameDecoder::decode_blocks_partial`]: crate::decoding::FrameDecoder::decode_blocks_partial
126 ///
127 /// # Examples
128 ///
129 /// ```
130 /// # #[cfg(feature = "lsm")] {
131 /// use structured_zstd::encoding::frame_emit_info::{FrameBlock, FrameEmitInfo, BlockType};
132 /// let info = FrameEmitInfo {
133 /// frame_header_range: 0..6,
134 /// blocks: vec![
135 /// FrameBlock { offset_in_frame: 6, header_size: 3, body_size: 10,
136 /// block_size_field: 10, block_type: BlockType::Compressed,
137 /// last_block: false, decompressed_size: 100 },
138 /// FrameBlock { offset_in_frame: 19, header_size: 3, body_size: 20,
139 /// block_size_field: 20, block_type: BlockType::Compressed,
140 /// last_block: true, decompressed_size: 40 },
141 /// ],
142 /// checksum_range: None,
143 /// total_size: 42,
144 /// };
145 /// assert_eq!(info.decompressed_byte_range(0), Some(0..100));
146 /// assert_eq!(info.decompressed_byte_range(1), Some(100..140));
147 /// assert_eq!(info.decompressed_byte_range(2), None);
148 /// # }
149 /// ```
150 pub fn decompressed_byte_range(&self, block_index: usize) -> Option<core::ops::Range<u64>> {
151 let target = self.blocks.get(block_index)?;
152 // Prefix sum over preceding blocks. Block count is bounded by the
153 // frame's block count (each block is >= 3 wire bytes), so the
154 // accumulator stays well within u64.
155 let start: u64 = self.blocks[..block_index]
156 .iter()
157 .map(|b| u64::from(b.decompressed_size))
158 .sum();
159 Some(start..start + u64::from(target.decompressed_size))
160 }
161}