structured_zstd/decoding/
dictionary.rs1use alloc::vec::Vec;
2use core::convert::TryInto;
3
4use crate::decoding::errors::DictionaryDecodeError;
5use crate::decoding::scratch::FSEScratch;
6use crate::decoding::scratch::HuffmanScratch;
7
8pub struct Dictionary {
13 pub id: u32,
16 pub fse: FSEScratch,
19 pub huf: HuffmanScratch,
22 pub dict_content: Vec<u8>,
32 pub offset_hist: [u32; 3],
37}
38
39pub const MAGIC_NUM: [u8; 4] = [0x37, 0xA4, 0x30, 0xEC];
41
42impl Dictionary {
43 pub fn from_raw_content(
48 id: u32,
49 dict_content: Vec<u8>,
50 ) -> Result<Dictionary, DictionaryDecodeError> {
51 if id == 0 {
52 return Err(DictionaryDecodeError::ZeroDictionaryId);
53 }
54 if dict_content.is_empty() {
55 return Err(DictionaryDecodeError::DictionaryTooSmall { got: 0, need: 1 });
56 }
57
58 Ok(Dictionary {
59 id,
60 fse: FSEScratch::new(),
61 huf: HuffmanScratch::new(),
62 dict_content,
63 offset_hist: [1, 4, 8],
64 })
65 }
66
67 pub fn decode_dict(raw: &[u8]) -> Result<Dictionary, DictionaryDecodeError> {
71 const MIN_MAGIC_AND_ID_LEN: usize = 8;
72 const OFFSET_HISTORY_LEN: usize = 12;
73
74 if raw.len() < MIN_MAGIC_AND_ID_LEN {
75 return Err(DictionaryDecodeError::DictionaryTooSmall {
76 got: raw.len(),
77 need: MIN_MAGIC_AND_ID_LEN,
78 });
79 }
80
81 let mut new_dict = Dictionary {
82 id: 0,
83 fse: FSEScratch::new(),
84 huf: HuffmanScratch::new(),
85 dict_content: Vec::new(),
86 offset_hist: [1, 4, 8],
87 };
88
89 let magic_num: [u8; 4] = raw[..4].try_into().expect("optimized away");
90 if magic_num != MAGIC_NUM {
91 return Err(DictionaryDecodeError::BadMagicNum { got: magic_num });
92 }
93
94 let dict_id = raw[4..8].try_into().expect("optimized away");
95 let dict_id = u32::from_le_bytes(dict_id);
96 if dict_id == 0 {
97 return Err(DictionaryDecodeError::ZeroDictionaryId);
98 }
99 new_dict.id = dict_id;
100
101 let raw_tables = &raw[8..];
102
103 let huf_size = new_dict.huf.table.build_decoder(raw_tables)?;
104 let raw_tables = &raw_tables[huf_size as usize..];
105
106 let of_size = new_dict.fse.offsets.build_decoder(
107 raw_tables,
108 crate::decoding::sequence_section_decoder::OF_MAX_LOG,
109 )?;
110 let raw_tables = &raw_tables[of_size..];
111
112 let ml_size = new_dict.fse.match_lengths.build_decoder(
113 raw_tables,
114 crate::decoding::sequence_section_decoder::ML_MAX_LOG,
115 )?;
116 let raw_tables = &raw_tables[ml_size..];
117
118 let ll_size = new_dict.fse.literal_lengths.build_decoder(
119 raw_tables,
120 crate::decoding::sequence_section_decoder::LL_MAX_LOG,
121 )?;
122 let raw_tables = &raw_tables[ll_size..];
123
124 if raw_tables.len() < OFFSET_HISTORY_LEN {
125 return Err(DictionaryDecodeError::DictionaryTooSmall {
126 got: raw_tables.len(),
127 need: OFFSET_HISTORY_LEN,
128 });
129 }
130
131 let offset1 = raw_tables[0..4].try_into().expect("optimized away");
132 let offset1 = u32::from_le_bytes(offset1);
133
134 let offset2 = raw_tables[4..8].try_into().expect("optimized away");
135 let offset2 = u32::from_le_bytes(offset2);
136
137 let offset3 = raw_tables[8..12].try_into().expect("optimized away");
138 let offset3 = u32::from_le_bytes(offset3);
139
140 if offset1 == 0 {
141 return Err(DictionaryDecodeError::ZeroRepeatOffsetInDictionary { index: 0 });
142 }
143 if offset2 == 0 {
144 return Err(DictionaryDecodeError::ZeroRepeatOffsetInDictionary { index: 1 });
145 }
146 if offset3 == 0 {
147 return Err(DictionaryDecodeError::ZeroRepeatOffsetInDictionary { index: 2 });
148 }
149
150 new_dict.offset_hist[0] = offset1;
151 new_dict.offset_hist[1] = offset2;
152 new_dict.offset_hist[2] = offset3;
153
154 let raw_content = &raw_tables[12..];
155 new_dict.dict_content.extend(raw_content);
156
157 Ok(new_dict)
158 }
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 fn offset_history_start(raw: &[u8]) -> usize {
166 let mut huf = crate::decoding::scratch::HuffmanScratch::new();
167 let mut fse = crate::decoding::scratch::FSEScratch::new();
168 let mut cursor = 8usize;
169
170 let huf_size = huf
171 .table
172 .build_decoder(&raw[cursor..])
173 .expect("reference dictionary huffman table should decode");
174 cursor += huf_size as usize;
175
176 let of_size = fse
177 .offsets
178 .build_decoder(
179 &raw[cursor..],
180 crate::decoding::sequence_section_decoder::OF_MAX_LOG,
181 )
182 .expect("reference dictionary OF table should decode");
183 cursor += of_size;
184
185 let ml_size = fse
186 .match_lengths
187 .build_decoder(
188 &raw[cursor..],
189 crate::decoding::sequence_section_decoder::ML_MAX_LOG,
190 )
191 .expect("reference dictionary ML table should decode");
192 cursor += ml_size;
193
194 let ll_size = fse
195 .literal_lengths
196 .build_decoder(
197 &raw[cursor..],
198 crate::decoding::sequence_section_decoder::LL_MAX_LOG,
199 )
200 .expect("reference dictionary LL table should decode");
201 cursor += ll_size;
202
203 cursor
204 }
205
206 #[test]
207 fn decode_dict_rejects_short_buffer_before_magic_and_id() {
208 let err = match Dictionary::decode_dict(&[]) {
209 Ok(_) => panic!("expected short dictionary to fail"),
210 Err(err) => err,
211 };
212 assert!(matches!(
213 err,
214 DictionaryDecodeError::DictionaryTooSmall { got: 0, need: 8 }
215 ));
216 }
217
218 #[test]
219 fn decode_dict_malformed_input_returns_error_instead_of_panicking() {
220 let mut raw = Vec::new();
221 raw.extend_from_slice(&MAGIC_NUM);
222 raw.extend_from_slice(&1u32.to_le_bytes());
223 raw.extend_from_slice(&[0u8; 7]);
224
225 let result = std::panic::catch_unwind(|| Dictionary::decode_dict(&raw));
226 assert!(
227 result.is_ok(),
228 "decode_dict must not panic on malformed input"
229 );
230 assert!(
231 result.unwrap().is_err(),
232 "malformed dictionary must return error"
233 );
234 }
235
236 #[test]
237 fn decode_dict_rejects_zero_repeat_offsets() {
238 let mut raw = include_bytes!("../../dict_tests/dictionary").to_vec();
239 let offset_start = offset_history_start(&raw);
240
241 raw[offset_start..offset_start + 4].copy_from_slice(&0u32.to_le_bytes());
243 let decoded = Dictionary::decode_dict(&raw);
244 assert!(matches!(
245 decoded,
246 Err(DictionaryDecodeError::ZeroRepeatOffsetInDictionary { index: 0 })
247 ));
248 }
249
250 #[test]
251 fn from_raw_content_rejects_empty_dictionary_content() {
252 let result = Dictionary::from_raw_content(1, Vec::new());
253 assert!(matches!(
254 result,
255 Err(DictionaryDecodeError::DictionaryTooSmall { got: 0, need: 1 })
256 ));
257 }
258}