1use crate::{ColumnCodec, ColumnTypeHint};
17
18use crate::CODEC_SAMPLE_SIZE;
19
20const CASCADE_THRESHOLD: usize = 128;
23
24pub fn detect_codec(codec: ColumnCodec, type_hint: ColumnTypeHint) -> ColumnCodec {
31 if codec != ColumnCodec::Auto {
32 return codec;
33 }
34
35 match type_hint {
39 ColumnTypeHint::Timestamp => ColumnCodec::DeltaFastLanesLz4,
40 ColumnTypeHint::Float64 => ColumnCodec::AlpFastLanesLz4,
41 ColumnTypeHint::Int64 => ColumnCodec::DeltaFastLanesLz4,
42 ColumnTypeHint::Symbol => ColumnCodec::FastLanesLz4,
43 ColumnTypeHint::String => ColumnCodec::FsstLz4,
44 }
45}
46
47pub fn detect_i64_codec(values: &[i64]) -> ColumnCodec {
52 if values.len() < 2 {
53 return ColumnCodec::Delta;
54 }
55
56 if values.len() >= CASCADE_THRESHOLD {
58 return ColumnCodec::DeltaFastLanesLz4;
59 }
60
61 let sample_end = values.len().min(CODEC_SAMPLE_SIZE);
63 let sample = &values[..sample_end];
64
65 let mut zero_dod_count = 0usize;
66 let mut prev_delta: Option<i64> = None;
67
68 for i in 1..sample.len() {
69 let delta = sample[i] - sample[i - 1];
70 if let Some(pd) = prev_delta
71 && delta == pd
72 {
73 zero_dod_count += 1;
74 }
75 prev_delta = Some(delta);
76 }
77
78 let total_deltas = sample.len() - 1;
79 let constant_rate_ratio = zero_dod_count as f64 / total_deltas.max(1) as f64;
80
81 if constant_rate_ratio > 0.8 {
82 ColumnCodec::DoubleDelta
83 } else {
84 ColumnCodec::Delta
85 }
86}
87
88pub fn detect_f64_codec(values: &[f64]) -> ColumnCodec {
93 if values.len() < 2 {
94 return ColumnCodec::Gorilla;
95 }
96
97 let use_cascade = values.len() >= CASCADE_THRESHOLD;
98
99 if use_cascade {
100 let encodability = crate::alp::alp_encodability(values);
102 if encodability > 0.95 {
103 return ColumnCodec::AlpFastLanesLz4;
104 }
105 }
106
107 ColumnCodec::Gorilla
109}
110
111#[cfg(test)]
112mod tests {
113 use super::*;
114
115 #[test]
116 fn explicit_codec_passthrough() {
117 assert_eq!(
118 detect_codec(ColumnCodec::Lz4, ColumnTypeHint::Timestamp),
119 ColumnCodec::Lz4
120 );
121 assert_eq!(
122 detect_codec(ColumnCodec::Zstd, ColumnTypeHint::Float64),
123 ColumnCodec::Zstd
124 );
125 }
126
127 #[test]
128 fn auto_timestamp() {
129 assert_eq!(
130 detect_codec(ColumnCodec::Auto, ColumnTypeHint::Timestamp),
131 ColumnCodec::DeltaFastLanesLz4
132 );
133 }
134
135 #[test]
136 fn auto_float64() {
137 assert_eq!(
138 detect_codec(ColumnCodec::Auto, ColumnTypeHint::Float64),
139 ColumnCodec::AlpFastLanesLz4
140 );
141 }
142
143 #[test]
144 fn auto_int64() {
145 assert_eq!(
146 detect_codec(ColumnCodec::Auto, ColumnTypeHint::Int64),
147 ColumnCodec::DeltaFastLanesLz4
148 );
149 }
150
151 #[test]
152 fn auto_symbol() {
153 assert_eq!(
154 detect_codec(ColumnCodec::Auto, ColumnTypeHint::Symbol),
155 ColumnCodec::FastLanesLz4
156 );
157 }
158
159 #[test]
160 fn auto_string() {
161 assert_eq!(
162 detect_codec(ColumnCodec::Auto, ColumnTypeHint::String),
163 ColumnCodec::FsstLz4
164 );
165 }
166
167 #[test]
168 fn detect_large_i64_uses_cascade() {
169 let values: Vec<i64> = (0..1000).map(|i| i * 100).collect();
171 assert_eq!(detect_i64_codec(&values), ColumnCodec::DeltaFastLanesLz4);
172
173 let timestamps: Vec<i64> = (0..1000).map(|i| 1_700_000_000_000 + i * 10_000).collect();
174 assert_eq!(
175 detect_i64_codec(×tamps),
176 ColumnCodec::DeltaFastLanesLz4
177 );
178 }
179
180 #[test]
181 fn detect_small_i64_uses_legacy() {
182 let constant_rate: Vec<i64> = (0..50).map(|i| i * 100).collect();
184 assert_eq!(detect_i64_codec(&constant_rate), ColumnCodec::DoubleDelta);
185
186 let varying: Vec<i64> = vec![1, 3, 7, 15, 22, 30];
187 assert_eq!(detect_i64_codec(&varying), ColumnCodec::Delta);
188 }
189
190 #[test]
191 fn detect_large_f64_decimal_uses_alp() {
192 let values: Vec<f64> = (0..1000).map(|i| i as f64 * 0.1).collect();
194 assert_eq!(detect_f64_codec(&values), ColumnCodec::AlpFastLanesLz4);
195 }
196
197 #[test]
198 fn detect_large_f64_irrational_uses_gorilla() {
199 let values: Vec<f64> = (1..1000).map(|i| std::f64::consts::PI * i as f64).collect();
201 assert_eq!(detect_f64_codec(&values), ColumnCodec::Gorilla);
202 }
203
204 #[test]
205 fn detect_small_f64_uses_gorilla() {
206 let values: Vec<f64> = (0..50).map(|i| i as f64 * 0.1).collect();
207 assert_eq!(detect_f64_codec(&values), ColumnCodec::Gorilla);
208 }
209
210 #[test]
211 fn small_sample() {
212 assert_eq!(detect_i64_codec(&[]), ColumnCodec::Delta);
213 assert_eq!(detect_i64_codec(&[42]), ColumnCodec::Delta);
214 assert_eq!(detect_f64_codec(&[]), ColumnCodec::Gorilla);
215 }
216}