1pub mod alp;
23pub mod alp_rd;
24pub mod codec_types;
25pub mod crdt_compress;
26pub mod delta;
27pub mod detect;
28pub mod double_delta;
29pub mod error;
30pub mod fastlanes;
31pub mod fsst;
32pub mod gorilla;
33pub mod lz4;
34pub mod pcodec;
35pub mod pipeline;
36pub mod rans;
37pub mod raw;
38pub mod spherical;
39pub mod vector_quant;
40pub mod zstd_codec;
41
42pub const CODEC_SAMPLE_SIZE: usize = 1024;
45
46pub use codec_types::{
47 ColumnCodec, ColumnStatistics, ColumnTypeHint, ResolvedColumnCodec, parse_codec_name,
48};
49pub use crdt_compress::CrdtOp;
50pub use delta::{DeltaDecoder, DeltaEncoder};
51pub use detect::detect_codec;
52pub use double_delta::{DoubleDeltaDecoder, DoubleDeltaEncoder};
53pub use error::CodecError;
54pub use gorilla::{GorillaDecoder, GorillaEncoder};
55pub use lz4::{Lz4Decoder, Lz4Encoder};
56pub use pipeline::{
57 decode_bytes_pipeline, decode_f64_pipeline, decode_i64_pipeline, encode_bytes_pipeline,
58 encode_f64_pipeline, encode_i64_pipeline,
59};
60pub use raw::{RawDecoder, RawEncoder};
61pub use zstd_codec::{ZstdDecoder, ZstdEncoder};
62
63#[cfg(test)]
64mod tests {
65 use super::*;
66
67 #[test]
71 fn canonical_codec_names_frozen() {
72 let canonical: &[(ColumnCodec, &str)] = &[
73 (ColumnCodec::Auto, "auto"),
74 (ColumnCodec::AlpFastLanesLz4, "alp_fastlanes_lz4"),
75 (ColumnCodec::AlpRdLz4, "alp_rd_lz4"),
76 (ColumnCodec::PcodecLz4, "pcodec_lz4"),
77 (ColumnCodec::DeltaFastLanesLz4, "delta_fastlanes_lz4"),
78 (ColumnCodec::FastLanesLz4, "fastlanes_lz4"),
79 (ColumnCodec::FsstLz4, "fsst_lz4"),
80 (ColumnCodec::AlpFastLanesRans, "alp_fastlanes_rans"),
81 (ColumnCodec::DeltaFastLanesRans, "delta_fastlanes_rans"),
82 (ColumnCodec::FsstRans, "fsst_rans"),
83 (ColumnCodec::Gorilla, "gorilla"),
84 (ColumnCodec::DoubleDelta, "double_delta"),
85 (ColumnCodec::Delta, "delta"),
86 (ColumnCodec::Lz4, "lz4"),
87 (ColumnCodec::Zstd, "zstd"),
88 (ColumnCodec::Raw, "raw"),
89 ];
90 for (codec, expected) in canonical {
91 assert_eq!(codec.as_str(), *expected, "codec name drift: {codec:?}");
92 assert!(
93 expected
94 .chars()
95 .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_'),
96 "codec name '{expected}' is not lowercase snake_case"
97 );
98 }
99 }
100
101 #[test]
106 fn resolved_codec_discriminants_match_column_codec() {
107 let pairs: &[(ResolvedColumnCodec, ColumnCodec)] = &[
108 (
109 ResolvedColumnCodec::AlpFastLanesLz4,
110 ColumnCodec::AlpFastLanesLz4,
111 ),
112 (ResolvedColumnCodec::AlpRdLz4, ColumnCodec::AlpRdLz4),
113 (ResolvedColumnCodec::PcodecLz4, ColumnCodec::PcodecLz4),
114 (
115 ResolvedColumnCodec::DeltaFastLanesLz4,
116 ColumnCodec::DeltaFastLanesLz4,
117 ),
118 (ResolvedColumnCodec::FastLanesLz4, ColumnCodec::FastLanesLz4),
119 (ResolvedColumnCodec::FsstLz4, ColumnCodec::FsstLz4),
120 (
121 ResolvedColumnCodec::AlpFastLanesRans,
122 ColumnCodec::AlpFastLanesRans,
123 ),
124 (
125 ResolvedColumnCodec::DeltaFastLanesRans,
126 ColumnCodec::DeltaFastLanesRans,
127 ),
128 (ResolvedColumnCodec::FsstRans, ColumnCodec::FsstRans),
129 (ResolvedColumnCodec::Gorilla, ColumnCodec::Gorilla),
130 (ResolvedColumnCodec::DoubleDelta, ColumnCodec::DoubleDelta),
131 (ResolvedColumnCodec::Delta, ColumnCodec::Delta),
132 (ResolvedColumnCodec::Lz4, ColumnCodec::Lz4),
133 (ResolvedColumnCodec::Zstd, ColumnCodec::Zstd),
134 (ResolvedColumnCodec::Raw, ColumnCodec::Raw),
135 ];
136
137 for &(resolved, column) in pairs {
138 let resolved_bytes = zerompk::to_msgpack_vec(&resolved).unwrap();
139 let column_bytes = zerompk::to_msgpack_vec(&column).unwrap();
140 assert_eq!(
141 resolved_bytes, column_bytes,
142 "discriminant mismatch for {resolved} vs {column}"
143 );
144
145 assert_eq!(
146 resolved.into_column_codec(),
147 column,
148 "into_column_codec mismatch for {resolved}"
149 );
150 }
151 }
152
153 #[test]
155 fn try_resolve_auto_returns_error() {
156 assert!(
157 matches!(
158 ColumnCodec::Auto.try_resolve(),
159 Err(crate::error::CodecError::UnresolvedAuto)
160 ),
161 "Auto.try_resolve() must return UnresolvedAuto error"
162 );
163 }
164
165 #[test]
166 fn try_resolve_concrete_succeeds() {
167 let concretes = [
168 ColumnCodec::AlpFastLanesLz4,
169 ColumnCodec::Gorilla,
170 ColumnCodec::Delta,
171 ColumnCodec::Raw,
172 ColumnCodec::Lz4,
173 ];
174 for codec in concretes {
175 assert!(
176 codec.try_resolve().is_ok(),
177 "{codec} should resolve successfully"
178 );
179 }
180 }
181
182 #[test]
183 fn resolved_codec_serde_roundtrip() {
184 for codec in [
185 ResolvedColumnCodec::AlpFastLanesLz4,
186 ResolvedColumnCodec::AlpRdLz4,
187 ResolvedColumnCodec::PcodecLz4,
188 ResolvedColumnCodec::DeltaFastLanesLz4,
189 ResolvedColumnCodec::FastLanesLz4,
190 ResolvedColumnCodec::FsstLz4,
191 ResolvedColumnCodec::AlpFastLanesRans,
192 ResolvedColumnCodec::DeltaFastLanesRans,
193 ResolvedColumnCodec::FsstRans,
194 ResolvedColumnCodec::Gorilla,
195 ResolvedColumnCodec::DoubleDelta,
196 ResolvedColumnCodec::Delta,
197 ResolvedColumnCodec::Lz4,
198 ResolvedColumnCodec::Zstd,
199 ResolvedColumnCodec::Raw,
200 ] {
201 let json = sonic_rs::to_string(&codec).unwrap();
202 let back: ResolvedColumnCodec = sonic_rs::from_str(&json).unwrap();
203 assert_eq!(codec, back, "serde roundtrip failed for {codec}");
204 }
205 }
206
207 #[test]
208 fn column_codec_serde_roundtrip() {
209 for codec in [
210 ColumnCodec::Auto,
211 ColumnCodec::AlpFastLanesLz4,
212 ColumnCodec::AlpRdLz4,
213 ColumnCodec::PcodecLz4,
214 ColumnCodec::DeltaFastLanesLz4,
215 ColumnCodec::FastLanesLz4,
216 ColumnCodec::FsstLz4,
217 ColumnCodec::AlpFastLanesRans,
218 ColumnCodec::DeltaFastLanesRans,
219 ColumnCodec::FsstRans,
220 ColumnCodec::Gorilla,
221 ColumnCodec::DoubleDelta,
222 ColumnCodec::Delta,
223 ColumnCodec::Lz4,
224 ColumnCodec::Zstd,
225 ColumnCodec::Raw,
226 ] {
227 let json = sonic_rs::to_string(&codec).unwrap();
228 let back: ColumnCodec = sonic_rs::from_str(&json).unwrap();
229 assert_eq!(codec, back, "serde roundtrip failed for {codec}");
230 }
231 }
232
233 #[test]
234 fn column_statistics_i64() {
235 let values = vec![10i64, 20, 30, 40, 50];
236 let stats = ColumnStatistics::from_i64(&values, ResolvedColumnCodec::Delta, 12);
237 assert_eq!(stats.count, 5);
238 assert_eq!(stats.min, Some(10.0));
239 assert_eq!(stats.max, Some(50.0));
240 assert_eq!(stats.sum, Some(150.0));
241 assert_eq!(stats.uncompressed_bytes, 40);
242 assert_eq!(stats.compressed_bytes, 12);
243 }
244
245 #[test]
246 fn column_statistics_f64() {
247 let values = vec![1.5f64, 2.5, 3.5];
248 let stats = ColumnStatistics::from_f64(&values, ResolvedColumnCodec::Gorilla, 8);
249 assert_eq!(stats.count, 3);
250 assert_eq!(stats.min, Some(1.5));
251 assert_eq!(stats.max, Some(3.5));
252 assert_eq!(stats.sum, Some(7.5));
253 }
254
255 #[test]
256 fn column_statistics_symbols() {
257 let values = vec![0u32, 1, 2, 0, 1];
258 let stats = ColumnStatistics::from_symbols(&values, 3, ResolvedColumnCodec::Raw, 20);
259 assert_eq!(stats.count, 5);
260 assert_eq!(stats.cardinality, Some(3));
261 assert!(stats.min.is_none());
262 }
263
264 #[test]
265 fn compression_ratio_calculation() {
266 let stats = ColumnStatistics {
267 codec: ResolvedColumnCodec::Delta,
268 count: 100,
269 min: None,
270 max: None,
271 sum: None,
272 cardinality: None,
273 compressed_bytes: 200,
274 uncompressed_bytes: 800,
275 };
276 assert!((stats.compression_ratio() - 4.0).abs() < f64::EPSILON);
277 }
278
279 #[test]
282 fn parse_codec_name_all_canonical_round_trip() {
283 let cases: &[(&str, ColumnCodec)] = &[
284 ("auto", ColumnCodec::Auto),
285 ("alp_fastlanes_lz4", ColumnCodec::AlpFastLanesLz4),
286 ("alp_rd_lz4", ColumnCodec::AlpRdLz4),
287 ("pcodec_lz4", ColumnCodec::PcodecLz4),
288 ("delta_fastlanes_lz4", ColumnCodec::DeltaFastLanesLz4),
289 ("fastlanes_lz4", ColumnCodec::FastLanesLz4),
290 ("fsst_lz4", ColumnCodec::FsstLz4),
291 ("alp_fastlanes_rans", ColumnCodec::AlpFastLanesRans),
292 ("delta_fastlanes_rans", ColumnCodec::DeltaFastLanesRans),
293 ("fsst_rans", ColumnCodec::FsstRans),
294 ("gorilla", ColumnCodec::Gorilla),
295 ("double_delta", ColumnCodec::DoubleDelta),
296 ("delta", ColumnCodec::Delta),
297 ("lz4", ColumnCodec::Lz4),
298 ("zstd", ColumnCodec::Zstd),
299 ("raw", ColumnCodec::Raw),
300 ];
301 for &(name, expected) in cases {
302 let parsed = parse_codec_name(name)
303 .unwrap_or_else(|e| panic!("parse_codec_name({name:?}) failed: {e}"));
304 assert_eq!(parsed, expected, "parse mismatch for {name:?}");
305 assert_eq!(
306 parsed.as_str(),
307 name,
308 "as_str() round-trip mismatch for {name:?}"
309 );
310 }
311 assert_eq!(
312 cases.len(),
313 16,
314 "variant count changed — update parse_codec_name"
315 );
316 }
317
318 #[test]
319 fn parse_codec_name_rejects_non_canonical() {
320 let bad: &[&str] = &[
321 "LZ4",
322 "Lz4",
323 "GORILLA",
324 "Gorilla",
325 "FastLanes",
326 "fast_lanes",
327 "fast-lanes",
328 "FSST",
329 "alp-fastlanes-lz4",
330 "ALP_FASTLANES_LZ4",
331 "Delta_FastLanes_LZ4",
332 "ZSTD",
333 "RAW",
334 "",
335 " lz4",
336 "lz4 ",
337 "unknown",
338 "pcodec",
339 ];
340 for &name in bad {
341 let result = parse_codec_name(name);
342 assert!(
343 result.is_err(),
344 "parse_codec_name({name:?}) should have been rejected but returned Ok"
345 );
346 let err = result.unwrap_err();
347 assert!(
348 matches!(err, crate::error::CodecError::UnknownCodec { .. }),
349 "wrong error variant for {name:?}: {err}"
350 );
351 }
352 }
353
354 #[test]
355 fn parse_codec_name_error_message_content() {
356 let err = parse_codec_name("BadCodec").unwrap_err();
357 let msg = err.to_string();
358 assert!(
359 msg.contains("BadCodec"),
360 "error message should contain the bad name: {msg}"
361 );
362 assert!(
363 msg.contains("lz4"),
364 "error message should list at least one valid name: {msg}"
365 );
366 }
367}