1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
use super::*;
use test_log::test;
#[test]
fn compression_serialize_none() {
let serialized = CompressionType::None.encode_into_vec();
assert_eq!(1, serialized.len());
}
#[cfg(feature = "lz4")]
mod lz4 {
use super::*;
use test_log::test;
#[test]
fn compression_serialize_lz4() {
let serialized = CompressionType::Lz4.encode_into_vec();
assert_eq!(1, serialized.len());
}
}
#[cfg(zstd_any)]
mod zstd {
use super::*;
use test_log::test;
#[test]
fn compression_serialize_zstd() {
let serialized = CompressionType::Zstd(3).encode_into_vec();
assert_eq!(2, serialized.len());
}
#[test]
fn compression_roundtrip_zstd() {
for level in [1, 3, 9, 19] {
let original = CompressionType::Zstd(level);
let serialized = original.encode_into_vec();
let decoded =
CompressionType::decode_from(&mut &serialized[..]).expect("decode failed");
assert_eq!(original, decoded);
}
}
#[test]
fn compression_display_zstd() {
assert_eq!(format!("{}", CompressionType::Zstd(3)), "zstd");
}
#[test]
fn compression_zstd_rejects_invalid_level() {
for invalid_level in [0, 23, -1, 200] {
let result = CompressionType::zstd(invalid_level);
assert!(result.is_err(), "level {invalid_level} should be rejected");
}
}
#[test]
fn compression_zstd_decode_rejects_invalid_level() {
// Serialize a valid zstd value, then corrupt the level byte
let valid = CompressionType::Zstd(3).encode_into_vec();
assert_eq!(valid.len(), 2);
// Flip level byte to 0 (out of range 1..=22)
let corrupted = vec![valid[0], 0];
let result = CompressionType::decode_from(&mut &corrupted[..]);
assert!(result.is_err(), "level 0 should be rejected on decode");
// Flip level byte to 23 (out of range)
let corrupted = vec![valid[0], 23];
let result = CompressionType::decode_from(&mut &corrupted[..]);
assert!(result.is_err(), "level 23 should be rejected on decode");
}
#[test]
fn compression_serialize_zstd_dict() {
let serialized = CompressionType::ZstdDict {
level: 3,
dict_id: 0xDEAD_BEEF,
}
.encode_into_vec();
// tag=4, level=3 as i8, dict_id=0xDEAD_BEEF in little-endian
assert_eq!(serialized, [4, 3, 0xEF, 0xBE, 0xAD, 0xDE]);
}
#[test]
fn compression_roundtrip_zstd_dict() {
for level in [1, 3, 9, 19] {
for dict_id in [0, 1, 0xDEAD_BEEF, u32::MAX] {
let original = CompressionType::ZstdDict { level, dict_id };
let serialized = original.encode_into_vec();
let decoded =
CompressionType::decode_from(&mut &serialized[..]).expect("decode failed");
assert_eq!(original, decoded);
}
}
}
#[test]
fn compression_display_zstd_dict() {
assert_eq!(
format!(
"{}",
CompressionType::ZstdDict {
level: 3,
dict_id: 42
}
),
"zstd+dict"
);
}
#[test]
fn compression_zstd_dict_rejects_invalid_level() {
for invalid_level in [0, 23, -1, 200] {
let result = CompressionType::zstd_dict(invalid_level, 42);
assert!(result.is_err(), "level {invalid_level} should be rejected");
}
}
#[test]
fn compression_zstd_dict_decode_rejects_invalid_level() {
// Serialize a valid ZstdDict, then corrupt the level byte to 0
let mut buf = CompressionType::ZstdDict {
level: 3,
dict_id: 42,
}
.encode_into_vec();
assert_eq!(buf[0], 4); // tag
buf[1] = 0; // corrupt level to 0 (out of range 1..=22)
let result = CompressionType::decode_from(&mut &buf[..]);
assert!(result.is_err(), "level 0 should be rejected on decode");
}
#[test]
fn zstd_dictionary_id_deterministic() {
let dict_bytes = b"sample dictionary content for testing";
let d1 = ZstdDictionary::new(dict_bytes);
let d2 = ZstdDictionary::new(dict_bytes);
assert_eq!(d1.id(), d2.id());
}
#[test]
fn zstd_dictionary_different_content_different_id() {
let d1 = ZstdDictionary::new(b"dictionary one");
let d2 = ZstdDictionary::new(b"dictionary two");
assert_ne!(d1.id(), d2.id());
}
#[test]
fn zstd_dictionary_raw_roundtrip() {
let raw = b"my dictionary bytes";
let dict = ZstdDictionary::new(raw);
assert_eq!(dict.raw(), raw);
}
#[test]
fn zstd_dictionary_debug_format() {
let dict = ZstdDictionary::new(b"test");
let debug = format!("{dict:?}");
assert!(debug.contains("ZstdDictionary"));
assert!(debug.contains("size: 4"));
}
// --- prepared_handle: pre-parsed `DictionaryHandle` cache ---
//
// The whole point of #232: parse the dictionary ONCE per
// `ZstdDictionary` instance and reuse the Arc-backed handle on every
// subsequent decompress call, across all threads. The tests below
// pin the contract: success / memoization / shared-OnceCell-across-
// clones / both finalized + raw-content paths / error surfacing.
#[cfg(feature = "zstd")]
#[test]
fn prepared_handle_raw_content_dict_parses_and_memoizes() {
// Raw-content path: no magic prefix. structured-zstd builds a
// `Dictionary` from the bytes treated as LZ77 history. First
// call parses; second call must hit the OnceCell cache and
// return a handle that compares-equal to the first.
let dict = ZstdDictionary::new(b"raw-content training bytes here");
let h1 = dict
.prepared_handle()
.expect("first call must parse raw-content dict");
let h2 = dict
.prepared_handle()
.expect("second call must hit the cache");
assert_eq!(
h1.id(),
h2.id(),
"cached handle must report the same dict id"
);
}
#[cfg(feature = "zstd")]
#[test]
fn prepared_handle_rejects_corrupted_finalized_magic() {
// Bytes that LOOK like a finalized dict (magic prefix matches)
// but are otherwise malformed must surface a parse error
// through `prepared_handle` rather than panicking. The OnceCell
// must NOT be populated with anything on failure — otherwise a
// future caller would skip the (now-deterministically-failing)
// parse and silently fall back to a stale cached value, breaking
// the retry-on-failure contract.
let mut bad = vec![0x37, 0xA4, 0x30, 0xEC]; // valid magic
bad.extend_from_slice(&[0xFF; 16]); // garbage payload
let dict = ZstdDictionary::new(&bad);
let result = dict.prepared_handle();
assert!(
result.is_err(),
"corrupted finalized dict must surface parse error",
);
assert!(
dict.prepared.get().is_none(),
"failed parse must NOT populate the OnceCell — retry-on-failure contract",
);
}
#[cfg(feature = "zstd")]
#[test]
fn prepared_handle_shared_across_clones() {
// `ZstdDictionary::clone` shares the inner `Arc<OnceCell<…>>`.
// Parsing through one clone must be visible to the other —
// otherwise each clone would re-parse independently, defeating
// the purpose of the cache when dictionaries are distributed
// across threads via clones.
let dict_a = ZstdDictionary::new(b"shared dict bytes for clone test");
let dict_b = dict_a.clone();
let _ = dict_a
.prepared_handle()
.expect("parse via dict_a must succeed");
// After dict_a parsed, dict_b's OnceCell (same Arc) must be
// populated. We cannot directly observe "did not re-parse"
// without instrumentation, but we can assert the cached
// handle round-trips through dict_b and reports the same id.
let h_b = dict_b
.prepared_handle()
.expect("dict_b must see cached handle");
assert_eq!(h_b.id(), dict_a.id());
// Cross-check OnceCell state directly: it is .get()-readable
// from both clones.
assert!(
dict_b.prepared.get().is_some(),
"OnceCell must be populated on dict_b after dict_a parsed",
);
}
#[cfg(feature = "zstd")]
#[test]
fn prepared_handle_is_lazy_and_populated_after_first_call() {
// The cache contract is lazy-init: `ZstdDictionary::new` must
// NOT eagerly parse, and the OnceCell must transition from
// `None` to `Some(_)` precisely on the first `prepared_handle`
// call. This pins both halves of the contract — a regression
// either way (eager parse OR no caching) lights up the assert.
//
// The end-to-end "real finalized dict parses successfully" path
// is exercised by the existing `zstd_backend` round-trip suite
// (which feeds real compressed frames through `decompress_with_dict`,
// implicitly going through `prepared_handle`); duplicating the
// dict-builder here would require linking the zstd dict trainer
// and adds no coverage over what the backend tests already give.
let dict = ZstdDictionary::new(b"laziness test bytes");
assert!(
dict.prepared.get().is_none(),
"ZstdDictionary::new must NOT eagerly parse the dictionary",
);
let _ = dict.prepared_handle().expect("explicit parse must succeed");
assert!(
dict.prepared.get().is_some(),
"OnceCell must be populated after first prepared_handle call",
);
}
}