1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
use crate::common::{MAGIC_NUM, MAX_WINDOW_SIZE, MIN_WINDOW_SIZE};
use crate::decoding::errors::{FrameDescriptorError, FrameHeaderError, ReadFrameHeaderError};
use crate::io::Read;
/// Test-only convenience wrapper around
/// [`read_frame_header_with_format`] with `magicless = false`.
/// Production decoder paths route through
/// `read_frame_header_with_format` directly so that the magicless
/// bit is threaded explicitly; this wrapper keeps the existing
/// in-crate `tests/` call sites simple.
#[cfg(test)]
pub(crate) fn read_frame_header(r: impl Read) -> Result<(FrameHeader, u8), ReadFrameHeaderError> {
read_frame_header_with_format(r, false)
}
/// Read a single serialized frame header. When `magicless` is
/// `true`, the 4-byte magic prefix is NOT consumed and skippable-
/// frame detection is bypassed — the caller MUST know out-of-band
/// that the stream is magicless. Donor parity:
/// `ZSTD_f_zstd1_magicless` via `ZSTD_d_format`.
pub fn read_frame_header_with_format(
mut r: impl Read,
magicless: bool,
) -> Result<(FrameHeader, u8), ReadFrameHeaderError> {
use ReadFrameHeaderError as err;
let mut buf = [0u8; 4];
let mut bytes_read = 0;
if !magicless {
r.read_exact(&mut buf).map_err(err::MagicNumberReadError)?;
bytes_read = 4;
let magic_num = u32::from_le_bytes(buf);
// Skippable frames have a magic number in this interval
if (0x184D2A50..=0x184D2A5F).contains(&magic_num) {
r.read_exact(&mut buf)
.map_err(err::FrameDescriptorReadError)?;
let skip_size = u32::from_le_bytes(buf);
return Err(ReadFrameHeaderError::SkipFrame {
magic_number: magic_num,
length: skip_size,
});
}
if magic_num != MAGIC_NUM {
return Err(ReadFrameHeaderError::BadMagicNumber(magic_num));
}
}
r.read_exact(&mut buf[0..1])
.map_err(err::FrameDescriptorReadError)?;
let desc = FrameDescriptor(buf[0]);
bytes_read += 1;
let mut frame_header = FrameHeader {
descriptor: FrameDescriptor(desc.0),
dict_id: None,
frame_content_size: 0,
window_descriptor: 0,
};
if !desc.single_segment_flag() {
r.read_exact(&mut buf[0..1])
.map_err(err::WindowDescriptorReadError)?;
frame_header.window_descriptor = buf[0];
bytes_read += 1;
}
let dict_id_len = desc.dictionary_id_bytes()? as usize;
if dict_id_len != 0 {
let buf = &mut buf[..dict_id_len];
r.read_exact(buf).map_err(err::DictionaryIdReadError)?;
bytes_read += dict_id_len;
let mut dict_id = 0u32;
#[allow(clippy::needless_range_loop)]
for i in 0..dict_id_len {
dict_id += (buf[i] as u32) << (8 * i);
}
if dict_id != 0 {
frame_header.dict_id = Some(dict_id);
}
}
let fcs_len = desc.frame_content_size_bytes()? as usize;
if fcs_len != 0 {
let mut fcs_buf = [0u8; 8];
let fcs_buf = &mut fcs_buf[..fcs_len];
r.read_exact(fcs_buf)
.map_err(err::FrameContentSizeReadError)?;
bytes_read += fcs_len;
let mut fcs = 0u64;
#[allow(clippy::needless_range_loop)]
for i in 0..fcs_len {
fcs += (fcs_buf[i] as u64) << (8 * i);
}
if fcs_len == 2 {
fcs += 256;
}
frame_header.frame_content_size = fcs;
}
Ok((frame_header, bytes_read as u8))
}
/// A frame header has a variable size, with a minimum of 2 bytes, and a maximum of 14 bytes.
pub struct FrameHeader {
pub descriptor: FrameDescriptor,
/// The `Window_Descriptor` field contains the minimum size of a memory buffer needed to
/// decompress the entire frame.
///
/// This byte is not included in the frame header when the `Single_Segment_flag` is set.
///
/// Bits 7-3 refer to the `Exponent`, where bits 2-0 refer to the `Mantissa`.
///
/// To determine the size of a window, the following formula can be used:
/// ```text
/// windowLog = 10 + Exponent;
/// windowBase = 1 << windowLog;
/// windowAdd = (windowBase / 8) * Mantissa;
/// Window_Size = windowBase + windowAdd;
/// ```
/// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor>
window_descriptor: u8,
/// The `Dictionary_ID` field contains the ID of the dictionary to be used to decode the frame.
/// When this value is not present, it's up to the decoder to know which dictionary to use.
dict_id: Option<u32>,
/// The size of the original/uncompressed content.
frame_content_size: u64,
}
impl FrameHeader {
/// Read the size of the window from the header or the total frame content size,
/// whichever is defined, returning the size in bytes.
pub fn window_size(&self) -> Result<u64, FrameHeaderError> {
if self.descriptor.single_segment_flag() {
Ok(self.frame_content_size())
} else {
let exp = self.window_descriptor >> 3;
let mantissa = self.window_descriptor & 0x7;
let window_log = 10 + u64::from(exp);
let window_base = 1 << window_log;
let window_add = (window_base / 8) * u64::from(mantissa);
let window_size = window_base + window_add;
if window_size >= MIN_WINDOW_SIZE {
if window_size < MAX_WINDOW_SIZE {
Ok(window_size)
} else {
Err(FrameHeaderError::WindowTooBig { got: window_size })
}
} else {
Err(FrameHeaderError::WindowTooSmall { got: window_size })
}
}
}
/// The ID (if provided) of the dictionary required to decode this frame.
pub fn dictionary_id(&self) -> Option<u32> {
self.dict_id
}
/// Obtain the uncompressed size (in bytes) of the frame contents.
pub fn frame_content_size(&self) -> u64 {
self.frame_content_size
}
/// Whether the frame header carried an explicit `Frame_Content_Size`
/// field on the wire. Distinguishes "FCS absent" (FCS_flag=0 +
/// `Single_Segment_flag=0`) from "FCS=0 explicitly declared"
/// (FCS_flag>=1 with a zero value, or FCS_flag=0 +
/// `Single_Segment_flag=1` with a 1-byte FCS=0). Both leave
/// [`Self::frame_content_size`] at `0`, so [`Self::frame_content_size`]
/// alone cannot distinguish the two; callers that need to know
/// whether the value is actually a wire-format declaration (e.g.
/// for post-decode size validation) should consult this method.
#[allow(dead_code)]
pub fn fcs_declared(&self) -> bool {
// `frame_content_size_bytes()` returns 0 only when FCS_flag=0
// AND single_segment_flag=0 — exactly the "no FCS on the wire"
// case. Any other combination (FCS_flag in 1..=3, or
// single_segment_flag set) writes a non-zero number of bytes.
// The descriptor was already validated when the header was
// parsed; if `frame_content_size_bytes()` is `Err` here the
// header object should not exist, so unwrap_or(0) collapses
// the (unreachable) error into the safe "treat as absent"
// answer.
self.descriptor
.frame_content_size_bytes()
.map(|n| n != 0)
.unwrap_or(false)
}
/// Raw `Window_Descriptor` byte from the frame header
/// (RFC 8878 §3.1.1.1.2 layout: `(exp << 3) | mantissa`),
/// or `None` when the `Single_Segment_flag` is set — in
/// single-segment frames the byte is absent from the wire
/// (the `Window_Size` is derived from `Frame_Content_Size`
/// instead). The parser leaves the struct's
/// `window_descriptor` field at its default `0` in that case;
/// this accessor reports the absence via `None` so callers
/// don't conflate "missing byte on the wire" with "byte
/// present and equal to 0".
///
/// `frame` module is `pub(crate)`, so this method is reachable
/// only from in-crate validation paths (e.g. the `lsm` feature's
/// `expect_window_descriptor` setter); `allow(dead_code)` keeps
/// default builds warning-free when no in-crate caller invokes
/// it.
#[allow(dead_code)]
pub fn window_descriptor(&self) -> Option<u8> {
if self.descriptor.single_segment_flag() {
None
} else {
Some(self.window_descriptor)
}
}
}
/// The first byte is called the `Frame Header Descriptor`, and it describes what other fields
/// are present.
pub struct FrameDescriptor(pub u8);
impl FrameDescriptor {
/// Read the `Frame_Content_Size_flag` from the frame header descriptor.
///
/// This is a 2 bit flag, specifying if the `Frame_Content_Size` field is present
/// within the header. It notates the number of bytes used by `Frame_Content_size`
///
/// When this value is is 0, `FCS_Field_Size` depends on Single_Segment_flag.
/// If the `Single_Segment_flag` field is set in the frame header descriptor,
/// the size of the `Frame_Content_Size` field of the header is 1 byte.
/// Otherwise, `FCS_Field_Size` is 0, and the `Frame_Content_Size` is not provided.
///
/// | Flag Value (decimal) | Size of the `Frame_Content_Size` field in bytes |
/// | -- | -- |
/// | 0 | 0 or 1 (see above) |
/// | 1 | 2 |
/// | 2 | 4 |
/// | 3 | 8 |
pub fn frame_content_size_flag(&self) -> u8 {
self.0 >> 6
}
/// This bit is reserved for some future feature, a compliant decoder **must ensure**
/// that this value is set to zero.
#[expect(dead_code)]
pub fn reserved_flag(&self) -> bool {
((self.0 >> 3) & 0x1) == 1
}
/// If this flag is set, data must be regenerated within a single continuous memory segment.
///
/// In this case, the `Window_Descriptor` byte is skipped, but `Frame_Content_Size` is present.
/// The decoder must allocate a memory segment equal to or larger than `Frame_Content_Size`.
pub fn single_segment_flag(&self) -> bool {
((self.0 >> 5) & 0x1) == 1
}
/// If this flag is set, a 32 bit `Content_Checksum` will be present at the end of the frame.
pub fn content_checksum_flag(&self) -> bool {
((self.0 >> 2) & 0x1) == 1
}
/// This is a two bit flag telling if a dictionary ID is provided within the header. It also
/// specifies the size of this field
///
/// | Value (Decimal) | `DID_Field_Size` (bytes) |
/// | -- | -- |
/// | 0 | 0 |
/// | 1 | 1 |
/// | 2 | 2 |
/// | 3 | 4 |
pub fn dict_id_flag(&self) -> u8 {
self.0 & 0x3
}
/// Read the size of the `Frame_Content_size` field from the frame header descriptor, returning
/// the size in bytes.
/// If this value is zero, then the `Frame_Content_Size` field is not present within the header.
pub fn frame_content_size_bytes(&self) -> Result<u8, FrameDescriptorError> {
match self.frame_content_size_flag() {
0 => {
if self.single_segment_flag() {
Ok(1)
} else {
Ok(0)
}
}
1 => Ok(2),
2 => Ok(4),
3 => Ok(8),
other => Err(FrameDescriptorError::InvalidFrameContentSizeFlag { got: other }),
}
}
/// Read the size of the `Dictionary_ID` field from the frame header descriptor, returning the size in bytes.
/// If this value is zero, then the dictionary id is not present within the header,
/// and "It's up to the decoder to know which dictionary to use."
pub fn dictionary_id_bytes(&self) -> Result<u8, FrameDescriptorError> {
match self.dict_id_flag() {
0 => Ok(0),
1 => Ok(1),
2 => Ok(2),
3 => Ok(4),
other => Err(FrameDescriptorError::InvalidFrameContentSizeFlag { got: other }),
}
}
}