1use crate::{ColumnCodec, ColumnTypeHint};
15
16use crate::CODEC_SAMPLE_SIZE;
17
18const CASCADE_THRESHOLD: usize = 128;
21
22pub fn detect_codec(codec: ColumnCodec, type_hint: ColumnTypeHint) -> ColumnCodec {
29 if codec != ColumnCodec::Auto {
30 return codec;
31 }
32
33 match type_hint {
37 ColumnTypeHint::Timestamp => ColumnCodec::DeltaFastLanesLz4,
38 ColumnTypeHint::Float64 => ColumnCodec::AlpFastLanesLz4,
39 ColumnTypeHint::Int64 => ColumnCodec::DeltaFastLanesLz4,
40 ColumnTypeHint::Symbol => ColumnCodec::FastLanesLz4,
41 ColumnTypeHint::String => ColumnCodec::FsstLz4,
42 }
43}
44
45pub fn detect_i64_codec(values: &[i64]) -> ColumnCodec {
50 if values.len() < 2 {
51 return ColumnCodec::Delta;
52 }
53
54 if values.len() >= CASCADE_THRESHOLD {
56 return ColumnCodec::DeltaFastLanesLz4;
57 }
58
59 let sample_end = values.len().min(CODEC_SAMPLE_SIZE);
61 let sample = &values[..sample_end];
62
63 let mut zero_dod_count = 0usize;
64 let mut prev_delta: Option<i64> = None;
65
66 for i in 1..sample.len() {
67 let delta = sample[i] - sample[i - 1];
68 if let Some(pd) = prev_delta
69 && delta == pd
70 {
71 zero_dod_count += 1;
72 }
73 prev_delta = Some(delta);
74 }
75
76 let total_deltas = sample.len() - 1;
77 let constant_rate_ratio = zero_dod_count as f64 / total_deltas.max(1) as f64;
78
79 if constant_rate_ratio > 0.8 {
80 ColumnCodec::DoubleDelta
81 } else {
82 ColumnCodec::Delta
83 }
84}
85
86pub fn detect_f64_codec(values: &[f64]) -> ColumnCodec {
91 if values.len() < 2 {
92 return ColumnCodec::Gorilla;
93 }
94
95 let use_cascade = values.len() >= CASCADE_THRESHOLD;
96
97 if use_cascade {
98 let encodability = crate::alp::alp_encodability(values);
100 if encodability > 0.95 {
101 return ColumnCodec::AlpFastLanesLz4;
102 }
103 }
104
105 ColumnCodec::Gorilla
107}
108
109#[cfg(test)]
110mod tests {
111 use super::*;
112
113 #[test]
114 fn explicit_codec_passthrough() {
115 assert_eq!(
116 detect_codec(ColumnCodec::Lz4, ColumnTypeHint::Timestamp),
117 ColumnCodec::Lz4
118 );
119 assert_eq!(
120 detect_codec(ColumnCodec::Zstd, ColumnTypeHint::Float64),
121 ColumnCodec::Zstd
122 );
123 }
124
125 #[test]
126 fn auto_timestamp() {
127 assert_eq!(
128 detect_codec(ColumnCodec::Auto, ColumnTypeHint::Timestamp),
129 ColumnCodec::DeltaFastLanesLz4
130 );
131 }
132
133 #[test]
134 fn auto_float64() {
135 assert_eq!(
136 detect_codec(ColumnCodec::Auto, ColumnTypeHint::Float64),
137 ColumnCodec::AlpFastLanesLz4
138 );
139 }
140
141 #[test]
142 fn auto_int64() {
143 assert_eq!(
144 detect_codec(ColumnCodec::Auto, ColumnTypeHint::Int64),
145 ColumnCodec::DeltaFastLanesLz4
146 );
147 }
148
149 #[test]
150 fn auto_symbol() {
151 assert_eq!(
152 detect_codec(ColumnCodec::Auto, ColumnTypeHint::Symbol),
153 ColumnCodec::FastLanesLz4
154 );
155 }
156
157 #[test]
158 fn auto_string() {
159 assert_eq!(
160 detect_codec(ColumnCodec::Auto, ColumnTypeHint::String),
161 ColumnCodec::FsstLz4
162 );
163 }
164
165 #[test]
166 fn detect_large_i64_uses_cascade() {
167 let values: Vec<i64> = (0..1000).map(|i| i * 100).collect();
169 assert_eq!(detect_i64_codec(&values), ColumnCodec::DeltaFastLanesLz4);
170
171 let timestamps: Vec<i64> = (0..1000).map(|i| 1_700_000_000_000 + i * 10_000).collect();
172 assert_eq!(
173 detect_i64_codec(×tamps),
174 ColumnCodec::DeltaFastLanesLz4
175 );
176 }
177
178 #[test]
179 fn detect_small_i64_uses_legacy() {
180 let constant_rate: Vec<i64> = (0..50).map(|i| i * 100).collect();
182 assert_eq!(detect_i64_codec(&constant_rate), ColumnCodec::DoubleDelta);
183
184 let varying: Vec<i64> = vec![1, 3, 7, 15, 22, 30];
185 assert_eq!(detect_i64_codec(&varying), ColumnCodec::Delta);
186 }
187
188 #[test]
189 fn detect_large_f64_decimal_uses_alp() {
190 let values: Vec<f64> = (0..1000).map(|i| i as f64 * 0.1).collect();
192 assert_eq!(detect_f64_codec(&values), ColumnCodec::AlpFastLanesLz4);
193 }
194
195 #[test]
196 fn detect_large_f64_irrational_uses_gorilla() {
197 let values: Vec<f64> = (1..1000).map(|i| std::f64::consts::PI * i as f64).collect();
199 assert_eq!(detect_f64_codec(&values), ColumnCodec::Gorilla);
200 }
201
202 #[test]
203 fn detect_small_f64_uses_gorilla() {
204 let values: Vec<f64> = (0..50).map(|i| i as f64 * 0.1).collect();
205 assert_eq!(detect_f64_codec(&values), ColumnCodec::Gorilla);
206 }
207
208 #[test]
209 fn small_sample() {
210 assert_eq!(detect_i64_codec(&[]), ColumnCodec::Delta);
211 assert_eq!(detect_i64_codec(&[42]), ColumnCodec::Delta);
212 assert_eq!(detect_f64_codec(&[]), ColumnCodec::Gorilla);
213 }
214}