Skip to main content

nodedb_codec/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Compression codecs for NodeDB timeseries columnar storage.
4//!
5//! Provides per-column codec selection with **cascading compression**:
6//! type-aware encoding (ALP, FastLanes, FSST, Pcodec) followed by a terminal
7//! byte compressor (lz4_flex for hot/warm, rANS for cold/S3).
8//!
9//! Cascading chains (hot/warm — lz4 terminal):
10//! - `AlpFastLanesLz4`:   f64 metrics → ALP → FastLanes → lz4
11//! - `DeltaFastLanesLz4`: i64 timestamps/counters → Delta → FastLanes → lz4
12//! - `FastLanesLz4`:      i64 raw integers → FastLanes → lz4
13//! - `FsstLz4`:           strings/logs → FSST → lz4
14//! - `PcodecLz4`:         complex numerics → Pcodec → lz4
15//! - `AlpRdLz4`:          true doubles → ALP-RD → lz4
16//!
17//! Cold/S3 tier chains (rANS terminal):
18//! - `AlpFastLanesRans`, `DeltaFastLanesRans`, `FsstRans`
19//!
20//! Shared by Origin and Lite. Compiles to WASM.
21
22pub mod alp;
23pub mod alp_rd;
24pub mod codec_types;
25pub mod crdt_compress;
26pub mod delta;
27pub mod detect;
28pub mod double_delta;
29pub mod error;
30pub mod fastlanes;
31pub mod fsst;
32pub mod gorilla;
33pub mod lz4;
34pub mod pcodec;
35pub mod pipeline;
36pub mod rans;
37pub mod raw;
38pub mod spherical;
39pub mod vector_quant;
40pub mod zstd_codec;
41
42/// Number of values to sample for codec auto-detection and exponent selection.
43/// Used by ALP, ALP-RD, and the codec detector.
44pub const CODEC_SAMPLE_SIZE: usize = 1024;
45
46pub use codec_types::{
47    ColumnCodec, ColumnStatistics, ColumnTypeHint, ResolvedColumnCodec, parse_codec_name,
48};
49pub use crdt_compress::CrdtOp;
50pub use delta::{DeltaDecoder, DeltaEncoder};
51pub use detect::detect_codec;
52pub use double_delta::{DoubleDeltaDecoder, DoubleDeltaEncoder};
53pub use error::CodecError;
54pub use gorilla::{GorillaDecoder, GorillaEncoder};
55pub use lz4::{Lz4Decoder, Lz4Encoder};
56pub use pipeline::{
57    decode_bytes_pipeline, decode_f64_pipeline, decode_i64_pipeline, encode_bytes_pipeline,
58    encode_f64_pipeline, encode_i64_pipeline,
59};
60pub use raw::{RawDecoder, RawEncoder};
61pub use zstd_codec::{ZstdDecoder, ZstdEncoder};
62
63#[cfg(test)]
64mod tests {
65    use super::*;
66
67    /// Frozen canonical codec name surface. Locks the lowercase, snake_case
68    /// forms before any user DDL exposes them. Adding a codec means appending
69    /// here and to `as_str()`; renaming any existing entry is a wire break.
70    #[test]
71    fn canonical_codec_names_frozen() {
72        let canonical: &[(ColumnCodec, &str)] = &[
73            (ColumnCodec::Auto, "auto"),
74            (ColumnCodec::AlpFastLanesLz4, "alp_fastlanes_lz4"),
75            (ColumnCodec::AlpRdLz4, "alp_rd_lz4"),
76            (ColumnCodec::PcodecLz4, "pcodec_lz4"),
77            (ColumnCodec::DeltaFastLanesLz4, "delta_fastlanes_lz4"),
78            (ColumnCodec::FastLanesLz4, "fastlanes_lz4"),
79            (ColumnCodec::FsstLz4, "fsst_lz4"),
80            (ColumnCodec::AlpFastLanesRans, "alp_fastlanes_rans"),
81            (ColumnCodec::DeltaFastLanesRans, "delta_fastlanes_rans"),
82            (ColumnCodec::FsstRans, "fsst_rans"),
83            (ColumnCodec::Gorilla, "gorilla"),
84            (ColumnCodec::DoubleDelta, "double_delta"),
85            (ColumnCodec::Delta, "delta"),
86            (ColumnCodec::Lz4, "lz4"),
87            (ColumnCodec::Zstd, "zstd"),
88            (ColumnCodec::Raw, "raw"),
89        ];
90        for (codec, expected) in canonical {
91            assert_eq!(codec.as_str(), *expected, "codec name drift: {codec:?}");
92            assert!(
93                expected
94                    .chars()
95                    .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_'),
96                "codec name '{expected}' is not lowercase snake_case"
97            );
98        }
99    }
100
101    // ── ResolvedColumnCodec tests ──────────────────────────────────────────────
102
103    /// Discriminants of ResolvedColumnCodec must exactly match those of the
104    /// corresponding ColumnCodec variants so on-disk byte values are unchanged.
105    #[test]
106    fn resolved_codec_discriminants_match_column_codec() {
107        let pairs: &[(ResolvedColumnCodec, ColumnCodec)] = &[
108            (
109                ResolvedColumnCodec::AlpFastLanesLz4,
110                ColumnCodec::AlpFastLanesLz4,
111            ),
112            (ResolvedColumnCodec::AlpRdLz4, ColumnCodec::AlpRdLz4),
113            (ResolvedColumnCodec::PcodecLz4, ColumnCodec::PcodecLz4),
114            (
115                ResolvedColumnCodec::DeltaFastLanesLz4,
116                ColumnCodec::DeltaFastLanesLz4,
117            ),
118            (ResolvedColumnCodec::FastLanesLz4, ColumnCodec::FastLanesLz4),
119            (ResolvedColumnCodec::FsstLz4, ColumnCodec::FsstLz4),
120            (
121                ResolvedColumnCodec::AlpFastLanesRans,
122                ColumnCodec::AlpFastLanesRans,
123            ),
124            (
125                ResolvedColumnCodec::DeltaFastLanesRans,
126                ColumnCodec::DeltaFastLanesRans,
127            ),
128            (ResolvedColumnCodec::FsstRans, ColumnCodec::FsstRans),
129            (ResolvedColumnCodec::Gorilla, ColumnCodec::Gorilla),
130            (ResolvedColumnCodec::DoubleDelta, ColumnCodec::DoubleDelta),
131            (ResolvedColumnCodec::Delta, ColumnCodec::Delta),
132            (ResolvedColumnCodec::Lz4, ColumnCodec::Lz4),
133            (ResolvedColumnCodec::Zstd, ColumnCodec::Zstd),
134            (ResolvedColumnCodec::Raw, ColumnCodec::Raw),
135        ];
136
137        for &(resolved, column) in pairs {
138            let resolved_bytes = zerompk::to_msgpack_vec(&resolved).unwrap();
139            let column_bytes = zerompk::to_msgpack_vec(&column).unwrap();
140            assert_eq!(
141                resolved_bytes, column_bytes,
142                "discriminant mismatch for {resolved} vs {column}"
143            );
144
145            assert_eq!(
146                resolved.into_column_codec(),
147                column,
148                "into_column_codec mismatch for {resolved}"
149            );
150        }
151    }
152
153    /// Auto resolves to an error; all concrete variants resolve successfully.
154    #[test]
155    fn try_resolve_auto_returns_error() {
156        assert!(
157            matches!(
158                ColumnCodec::Auto.try_resolve(),
159                Err(crate::error::CodecError::UnresolvedAuto)
160            ),
161            "Auto.try_resolve() must return UnresolvedAuto error"
162        );
163    }
164
165    #[test]
166    fn try_resolve_concrete_succeeds() {
167        let concretes = [
168            ColumnCodec::AlpFastLanesLz4,
169            ColumnCodec::Gorilla,
170            ColumnCodec::Delta,
171            ColumnCodec::Raw,
172            ColumnCodec::Lz4,
173        ];
174        for codec in concretes {
175            assert!(
176                codec.try_resolve().is_ok(),
177                "{codec} should resolve successfully"
178            );
179        }
180    }
181
182    #[test]
183    fn resolved_codec_serde_roundtrip() {
184        for codec in [
185            ResolvedColumnCodec::AlpFastLanesLz4,
186            ResolvedColumnCodec::AlpRdLz4,
187            ResolvedColumnCodec::PcodecLz4,
188            ResolvedColumnCodec::DeltaFastLanesLz4,
189            ResolvedColumnCodec::FastLanesLz4,
190            ResolvedColumnCodec::FsstLz4,
191            ResolvedColumnCodec::AlpFastLanesRans,
192            ResolvedColumnCodec::DeltaFastLanesRans,
193            ResolvedColumnCodec::FsstRans,
194            ResolvedColumnCodec::Gorilla,
195            ResolvedColumnCodec::DoubleDelta,
196            ResolvedColumnCodec::Delta,
197            ResolvedColumnCodec::Lz4,
198            ResolvedColumnCodec::Zstd,
199            ResolvedColumnCodec::Raw,
200        ] {
201            let json = sonic_rs::to_string(&codec).unwrap();
202            let back: ResolvedColumnCodec = sonic_rs::from_str(&json).unwrap();
203            assert_eq!(codec, back, "serde roundtrip failed for {codec}");
204        }
205    }
206
207    #[test]
208    fn column_codec_serde_roundtrip() {
209        for codec in [
210            ColumnCodec::Auto,
211            ColumnCodec::AlpFastLanesLz4,
212            ColumnCodec::AlpRdLz4,
213            ColumnCodec::PcodecLz4,
214            ColumnCodec::DeltaFastLanesLz4,
215            ColumnCodec::FastLanesLz4,
216            ColumnCodec::FsstLz4,
217            ColumnCodec::AlpFastLanesRans,
218            ColumnCodec::DeltaFastLanesRans,
219            ColumnCodec::FsstRans,
220            ColumnCodec::Gorilla,
221            ColumnCodec::DoubleDelta,
222            ColumnCodec::Delta,
223            ColumnCodec::Lz4,
224            ColumnCodec::Zstd,
225            ColumnCodec::Raw,
226        ] {
227            let json = sonic_rs::to_string(&codec).unwrap();
228            let back: ColumnCodec = sonic_rs::from_str(&json).unwrap();
229            assert_eq!(codec, back, "serde roundtrip failed for {codec}");
230        }
231    }
232
233    #[test]
234    fn column_statistics_i64() {
235        let values = vec![10i64, 20, 30, 40, 50];
236        let stats = ColumnStatistics::from_i64(&values, ResolvedColumnCodec::Delta, 12);
237        assert_eq!(stats.count, 5);
238        assert_eq!(stats.min, Some(10.0));
239        assert_eq!(stats.max, Some(50.0));
240        assert_eq!(stats.sum, Some(150.0));
241        assert_eq!(stats.uncompressed_bytes, 40);
242        assert_eq!(stats.compressed_bytes, 12);
243    }
244
245    #[test]
246    fn column_statistics_f64() {
247        let values = vec![1.5f64, 2.5, 3.5];
248        let stats = ColumnStatistics::from_f64(&values, ResolvedColumnCodec::Gorilla, 8);
249        assert_eq!(stats.count, 3);
250        assert_eq!(stats.min, Some(1.5));
251        assert_eq!(stats.max, Some(3.5));
252        assert_eq!(stats.sum, Some(7.5));
253    }
254
255    #[test]
256    fn column_statistics_symbols() {
257        let values = vec![0u32, 1, 2, 0, 1];
258        let stats = ColumnStatistics::from_symbols(&values, 3, ResolvedColumnCodec::Raw, 20);
259        assert_eq!(stats.count, 5);
260        assert_eq!(stats.cardinality, Some(3));
261        assert!(stats.min.is_none());
262    }
263
264    #[test]
265    fn compression_ratio_calculation() {
266        let stats = ColumnStatistics {
267            codec: ResolvedColumnCodec::Delta,
268            count: 100,
269            min: None,
270            max: None,
271            sum: None,
272            cardinality: None,
273            compressed_bytes: 200,
274            uncompressed_bytes: 800,
275        };
276        assert!((stats.compression_ratio() - 4.0).abs() < f64::EPSILON);
277    }
278
279    // ── parse_codec_name snapshot tests ───────────────────────────────────────
280
281    #[test]
282    fn parse_codec_name_all_canonical_round_trip() {
283        let cases: &[(&str, ColumnCodec)] = &[
284            ("auto", ColumnCodec::Auto),
285            ("alp_fastlanes_lz4", ColumnCodec::AlpFastLanesLz4),
286            ("alp_rd_lz4", ColumnCodec::AlpRdLz4),
287            ("pcodec_lz4", ColumnCodec::PcodecLz4),
288            ("delta_fastlanes_lz4", ColumnCodec::DeltaFastLanesLz4),
289            ("fastlanes_lz4", ColumnCodec::FastLanesLz4),
290            ("fsst_lz4", ColumnCodec::FsstLz4),
291            ("alp_fastlanes_rans", ColumnCodec::AlpFastLanesRans),
292            ("delta_fastlanes_rans", ColumnCodec::DeltaFastLanesRans),
293            ("fsst_rans", ColumnCodec::FsstRans),
294            ("gorilla", ColumnCodec::Gorilla),
295            ("double_delta", ColumnCodec::DoubleDelta),
296            ("delta", ColumnCodec::Delta),
297            ("lz4", ColumnCodec::Lz4),
298            ("zstd", ColumnCodec::Zstd),
299            ("raw", ColumnCodec::Raw),
300        ];
301        for &(name, expected) in cases {
302            let parsed = parse_codec_name(name)
303                .unwrap_or_else(|e| panic!("parse_codec_name({name:?}) failed: {e}"));
304            assert_eq!(parsed, expected, "parse mismatch for {name:?}");
305            assert_eq!(
306                parsed.as_str(),
307                name,
308                "as_str() round-trip mismatch for {name:?}"
309            );
310        }
311        assert_eq!(
312            cases.len(),
313            16,
314            "variant count changed — update parse_codec_name"
315        );
316    }
317
318    #[test]
319    fn parse_codec_name_rejects_non_canonical() {
320        let bad: &[&str] = &[
321            "LZ4",
322            "Lz4",
323            "GORILLA",
324            "Gorilla",
325            "FastLanes",
326            "fast_lanes",
327            "fast-lanes",
328            "FSST",
329            "alp-fastlanes-lz4",
330            "ALP_FASTLANES_LZ4",
331            "Delta_FastLanes_LZ4",
332            "ZSTD",
333            "RAW",
334            "",
335            " lz4",
336            "lz4 ",
337            "unknown",
338            "pcodec",
339        ];
340        for &name in bad {
341            let result = parse_codec_name(name);
342            assert!(
343                result.is_err(),
344                "parse_codec_name({name:?}) should have been rejected but returned Ok"
345            );
346            let err = result.unwrap_err();
347            assert!(
348                matches!(err, crate::error::CodecError::UnknownCodec { .. }),
349                "wrong error variant for {name:?}: {err}"
350            );
351        }
352    }
353
354    #[test]
355    fn parse_codec_name_error_message_content() {
356        let err = parse_codec_name("BadCodec").unwrap_err();
357        let msg = err.to_string();
358        assert!(
359            msg.contains("BadCodec"),
360            "error message should contain the bad name: {msg}"
361        );
362        assert!(
363            msg.contains("lz4"),
364            "error message should list at least one valid name: {msg}"
365        );
366    }
367}