structured_zstd/decoding/
dictionary.rs1#[cfg(not(target_has_atomic = "ptr"))]
2use alloc::rc::Rc;
3#[cfg(target_has_atomic = "ptr")]
4use alloc::sync::Arc;
5use alloc::vec::Vec;
6use core::convert::TryInto;
7
8use crate::decoding::errors::DictionaryDecodeError;
9use crate::decoding::scratch::FSEScratch;
10use crate::decoding::scratch::HuffmanScratch;
11
12pub struct Dictionary {
17 pub id: u32,
20 pub fse: FSEScratch,
23 pub huf: HuffmanScratch,
26 pub dict_content: Vec<u8>,
36 pub offset_hist: [u32; 3],
41}
42
43#[cfg(target_has_atomic = "ptr")]
44type SharedDictionary = Arc<Dictionary>;
45#[cfg(not(target_has_atomic = "ptr"))]
46type SharedDictionary = Rc<Dictionary>;
47
48#[derive(Clone)]
52pub struct DictionaryHandle {
53 inner: SharedDictionary,
54}
55
56pub const MAGIC_NUM: [u8; 4] = [0x37, 0xA4, 0x30, 0xEC];
58
59impl Dictionary {
60 pub fn from_raw_content(
65 id: u32,
66 dict_content: Vec<u8>,
67 ) -> Result<Dictionary, DictionaryDecodeError> {
68 if id == 0 {
69 return Err(DictionaryDecodeError::ZeroDictionaryId);
70 }
71 if dict_content.is_empty() {
72 return Err(DictionaryDecodeError::DictionaryTooSmall { got: 0, need: 1 });
73 }
74
75 Ok(Dictionary {
76 id,
77 fse: FSEScratch::new(),
78 huf: HuffmanScratch::new(),
79 dict_content,
80 offset_hist: [1, 4, 8],
81 })
82 }
83
84 pub fn decode_dict(raw: &[u8]) -> Result<Dictionary, DictionaryDecodeError> {
88 const MIN_MAGIC_AND_ID_LEN: usize = 8;
89 const OFFSET_HISTORY_LEN: usize = 12;
90
91 if raw.len() < MIN_MAGIC_AND_ID_LEN {
92 return Err(DictionaryDecodeError::DictionaryTooSmall {
93 got: raw.len(),
94 need: MIN_MAGIC_AND_ID_LEN,
95 });
96 }
97
98 let mut new_dict = Dictionary {
99 id: 0,
100 fse: FSEScratch::new(),
101 huf: HuffmanScratch::new(),
102 dict_content: Vec::new(),
103 offset_hist: [1, 4, 8],
104 };
105
106 let magic_num: [u8; 4] = raw[..4].try_into().expect("optimized away");
107 if magic_num != MAGIC_NUM {
108 return Err(DictionaryDecodeError::BadMagicNum { got: magic_num });
109 }
110
111 let dict_id = raw[4..8].try_into().expect("optimized away");
112 let dict_id = u32::from_le_bytes(dict_id);
113 if dict_id == 0 {
114 return Err(DictionaryDecodeError::ZeroDictionaryId);
115 }
116 new_dict.id = dict_id;
117
118 let raw_tables = &raw[8..];
119
120 let huf_size = new_dict.huf.table.build_decoder(raw_tables)?;
121 let raw_tables = &raw_tables[huf_size as usize..];
122
123 let of_size = new_dict.fse.offsets.build_decoder(
124 raw_tables,
125 crate::decoding::sequence_section_decoder::OF_MAX_LOG,
126 )?;
127 new_dict.fse.offsets.enrich_for_offsets();
128 let raw_tables = &raw_tables[of_size..];
129
130 let ml_size = new_dict.fse.match_lengths.build_decoder(
131 raw_tables,
132 crate::decoding::sequence_section_decoder::ML_MAX_LOG,
133 )?;
134 new_dict
135 .fse
136 .match_lengths
137 .enrich_with_packed_seq_meta(&crate::decoding::sequence_section_decoder::ML_META);
138 let raw_tables = &raw_tables[ml_size..];
139
140 let ll_size = new_dict.fse.literal_lengths.build_decoder(
141 raw_tables,
142 crate::decoding::sequence_section_decoder::LL_MAX_LOG,
143 )?;
144 new_dict
145 .fse
146 .literal_lengths
147 .enrich_with_packed_seq_meta(&crate::decoding::sequence_section_decoder::LL_META);
148 let raw_tables = &raw_tables[ll_size..];
149
150 if raw_tables.len() < OFFSET_HISTORY_LEN {
151 return Err(DictionaryDecodeError::DictionaryTooSmall {
152 got: raw_tables.len(),
153 need: OFFSET_HISTORY_LEN,
154 });
155 }
156
157 let offset1 = raw_tables[0..4].try_into().expect("optimized away");
158 let offset1 = u32::from_le_bytes(offset1);
159
160 let offset2 = raw_tables[4..8].try_into().expect("optimized away");
161 let offset2 = u32::from_le_bytes(offset2);
162
163 let offset3 = raw_tables[8..12].try_into().expect("optimized away");
164 let offset3 = u32::from_le_bytes(offset3);
165
166 if offset1 == 0 {
167 return Err(DictionaryDecodeError::ZeroRepeatOffsetInDictionary { index: 0 });
168 }
169 if offset2 == 0 {
170 return Err(DictionaryDecodeError::ZeroRepeatOffsetInDictionary { index: 1 });
171 }
172 if offset3 == 0 {
173 return Err(DictionaryDecodeError::ZeroRepeatOffsetInDictionary { index: 2 });
174 }
175
176 new_dict.offset_hist[0] = offset1;
177 new_dict.offset_hist[1] = offset2;
178 new_dict.offset_hist[2] = offset3;
179
180 let raw_content = &raw_tables[12..];
181 new_dict.dict_content.extend(raw_content);
182
183 Ok(new_dict)
184 }
185
186 pub fn into_handle(self) -> DictionaryHandle {
188 DictionaryHandle::from_dictionary(self)
189 }
190}
191
192impl DictionaryHandle {
193 pub fn from_dictionary(dict: Dictionary) -> Self {
195 Self {
196 inner: SharedDictionary::new(dict),
197 }
198 }
199
200 pub fn decode_dict(raw: &[u8]) -> Result<Self, DictionaryDecodeError> {
202 Dictionary::decode_dict(raw).map(Self::from_dictionary)
203 }
204
205 pub fn id(&self) -> u32 {
206 self.inner.id
207 }
208
209 pub fn as_dict(&self) -> &Dictionary {
210 &self.inner
211 }
212}
213
214impl AsRef<Dictionary> for DictionaryHandle {
215 fn as_ref(&self) -> &Dictionary {
216 self.as_dict()
217 }
218}
219
220impl From<Dictionary> for DictionaryHandle {
221 fn from(dict: Dictionary) -> Self {
222 DictionaryHandle::from_dictionary(dict)
223 }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229 use alloc::vec;
230
231 fn offset_history_start(raw: &[u8]) -> usize {
232 let mut huf = crate::decoding::scratch::HuffmanScratch::new();
233 let mut fse = crate::decoding::scratch::FSEScratch::new();
234 let mut cursor = 8usize;
235
236 let huf_size = huf
237 .table
238 .build_decoder(&raw[cursor..])
239 .expect("reference dictionary huffman table should decode");
240 cursor += huf_size as usize;
241
242 let of_size = fse
243 .offsets
244 .build_decoder(
245 &raw[cursor..],
246 crate::decoding::sequence_section_decoder::OF_MAX_LOG,
247 )
248 .expect("reference dictionary OF table should decode");
249 cursor += of_size;
250
251 let ml_size = fse
252 .match_lengths
253 .build_decoder(
254 &raw[cursor..],
255 crate::decoding::sequence_section_decoder::ML_MAX_LOG,
256 )
257 .expect("reference dictionary ML table should decode");
258 cursor += ml_size;
259
260 let ll_size = fse
261 .literal_lengths
262 .build_decoder(
263 &raw[cursor..],
264 crate::decoding::sequence_section_decoder::LL_MAX_LOG,
265 )
266 .expect("reference dictionary LL table should decode");
267 cursor += ll_size;
268
269 cursor
270 }
271
272 #[test]
273 fn decode_dict_rejects_short_buffer_before_magic_and_id() {
274 let err = match Dictionary::decode_dict(&[]) {
275 Ok(_) => panic!("expected short dictionary to fail"),
276 Err(err) => err,
277 };
278 assert!(matches!(
279 err,
280 DictionaryDecodeError::DictionaryTooSmall { got: 0, need: 8 }
281 ));
282 }
283
284 #[test]
285 fn decode_dict_malformed_input_returns_error_instead_of_panicking() {
286 let mut raw = Vec::new();
287 raw.extend_from_slice(&MAGIC_NUM);
288 raw.extend_from_slice(&1u32.to_le_bytes());
289 raw.extend_from_slice(&[0u8; 7]);
290
291 let result = std::panic::catch_unwind(|| Dictionary::decode_dict(&raw));
292 assert!(
293 result.is_ok(),
294 "decode_dict must not panic on malformed input"
295 );
296 assert!(
297 result.unwrap().is_err(),
298 "malformed dictionary must return error"
299 );
300 }
301
302 #[test]
303 fn decode_dict_rejects_zero_repeat_offsets() {
304 let mut raw = include_bytes!("../../dict_tests/dictionary").to_vec();
305 let offset_start = offset_history_start(&raw);
306
307 raw[offset_start..offset_start + 4].copy_from_slice(&0u32.to_le_bytes());
309 let decoded = Dictionary::decode_dict(&raw);
310 assert!(matches!(
311 decoded,
312 Err(DictionaryDecodeError::ZeroRepeatOffsetInDictionary { index: 0 })
313 ));
314 }
315
316 #[test]
317 fn from_raw_content_rejects_empty_dictionary_content() {
318 let result = Dictionary::from_raw_content(1, Vec::new());
319 assert!(matches!(
320 result,
321 Err(DictionaryDecodeError::DictionaryTooSmall { got: 0, need: 1 })
322 ));
323 }
324
325 #[test]
326 fn dictionary_handle_from_raw_content_supports_as_ref() {
327 let dict = Dictionary::from_raw_content(7, vec![42]).expect("raw dict should build");
328 let handle = dict.into_handle();
329 let dict_ref: &Dictionary = handle.as_ref();
330
331 assert_eq!(dict_ref.id, 7);
332 assert_eq!(dict_ref.dict_content.as_slice(), &[42]);
333 }
334
335 #[test]
336 fn dictionary_handle_clones_share_inner() {
337 let raw = include_bytes!("../../dict_tests/dictionary");
338 let handle = DictionaryHandle::decode_dict(raw).expect("dictionary should parse");
339 let clone = handle.clone();
340
341 assert_eq!(handle.id(), clone.id());
342 assert!(SharedDictionary::ptr_eq(&handle.inner, &clone.inner));
343 }
344}