mlt_core/frames/v01/id/
optimizer.rs1use crate::MltError;
2use crate::v01::{
3 DataProfile, EncodedId, IdEncoder, IdValues, IdWidth, IntEncoder, LogicalEncoder,
4 PhysicalEncoder,
5};
6
7#[derive(Debug, Clone, PartialEq)]
20pub struct IdProfile {
21 candidates: Vec<IntEncoder>,
25}
26
27impl IdProfile {
28 #[doc(hidden)]
29 #[must_use]
30 pub fn new(candidates: Vec<IntEncoder>) -> Self {
31 Self { candidates }
32 }
33
34 #[must_use]
36 pub fn from_sample(decoded: &IdValues) -> Self {
37 let ids = &decoded.0;
38 let Ok((_, _, id_width)) = single_pass_statistics(ids) else {
39 return Self {
40 candidates: vec![IntEncoder::varint()],
41 };
42 };
43 Self {
44 candidates: pruned_candidates(ids, id_width),
45 }
46 }
47
48 #[must_use]
52 pub fn merge(mut self, other: &Self) -> Self {
53 for &enc in &other.candidates {
54 if !self.candidates.contains(&enc) {
55 self.candidates.push(enc);
56 }
57 }
58 self
59 }
60}
61
62fn optimize(decoded: &IdValues) -> IdEncoder {
67 let ids = &decoded.0;
68 let (is_sequential, is_constant, id_width) = match single_pass_statistics(ids) {
69 Ok(stats) => stats,
70 Err(default_enc) => return default_enc,
71 };
72
73 if ids.len() <= 2 {
74 return IdEncoder::new(LogicalEncoder::None, id_width);
75 }
76
77 if is_sequential && ids.len() > 4 {
78 return IdEncoder::new(LogicalEncoder::DeltaRle, id_width);
79 }
80
81 if is_constant {
82 return IdEncoder::new(LogicalEncoder::Rle, id_width);
83 }
84
85 let candidates = pruned_candidates(ids, id_width);
86 let logical = compete_with(ids, id_width, &candidates);
87 IdEncoder::new(logical, id_width)
88}
89
90fn apply_profile(decoded: &IdValues, profile: &IdProfile) -> IdEncoder {
96 let ids = &decoded.0;
97 let (is_sequential, is_constant, id_width) = match single_pass_statistics(ids) {
98 Ok(stats) => stats,
99 Err(default_enc) => return default_enc,
100 };
101
102 if ids.len() <= 2 {
103 return IdEncoder::new(LogicalEncoder::None, id_width);
104 }
105
106 if is_sequential && ids.len() > 4 {
107 return IdEncoder::new(LogicalEncoder::DeltaRle, id_width);
108 }
109
110 if is_constant {
111 return IdEncoder::new(LogicalEncoder::Rle, id_width);
112 }
113
114 let logical = compete_with(ids, id_width, &profile.candidates);
115 IdEncoder::new(logical, id_width)
116}
117
118fn single_pass_statistics(ids: &[Option<u64>]) -> Result<(bool, bool, IdWidth), IdEncoder> {
123 let mut has_nulls = false;
124 let mut is_sequential = true;
125 let mut is_constant = true;
126
127 let mut ids_iter = ids.iter();
128 let first_non_null = loop {
129 match ids_iter.next() {
130 Some(Some(id)) => break *id,
131 Some(None) => has_nulls = true,
132 None => return Err(IdEncoder::new(LogicalEncoder::None, IdWidth::Id32)),
133 }
134 };
135
136 let mut max_value = first_non_null;
137 let mut prev_non_null = first_non_null;
138
139 for &id in ids_iter {
140 match id {
141 None => has_nulls = true,
142 Some(v) => {
143 max_value = max_value.max(v);
144 if v != prev_non_null.wrapping_add(1) {
145 is_sequential = false;
146 }
147 if v != first_non_null {
148 is_constant = false;
149 }
150 prev_non_null = v;
151 }
152 }
153 }
154
155 Ok((
156 is_sequential,
157 is_constant,
158 deduce_width(has_nulls, max_value),
159 ))
160}
161
162#[inline]
164fn deduce_width(has_nulls: bool, max_value: u64) -> IdWidth {
165 let fits_u32 = u32::try_from(max_value).is_ok();
166 match (has_nulls, fits_u32) {
167 (false, true) => IdWidth::Id32,
168 (true, true) => IdWidth::OptId32,
169 (false, false) => IdWidth::Id64,
170 (true, false) => IdWidth::OptId64,
171 }
172}
173
174fn pruned_candidates(ids: &[Option<u64>], id_width: IdWidth) -> Vec<IntEncoder> {
179 match id_width {
180 IdWidth::Id32 | IdWidth::OptId32 => {
181 #[expect(
182 clippy::cast_possible_truncation,
183 reason = "width was deduced as ≤ u32::MAX so truncation is safe"
184 )]
185 let vals: Vec<u32> = ids.iter().flatten().map(|&v| v as u32).collect();
186 filter_varint(&DataProfile::prune_candidates::<i32>(&vals))
187 }
188 IdWidth::Id64 | IdWidth::OptId64 => {
189 let vals: Vec<u64> = ids.iter().flatten().copied().collect();
190 filter_varint(&DataProfile::prune_candidates::<i64>(&vals))
191 }
192 }
193}
194
195fn compete_with(
198 ids: &[Option<u64>],
199 id_width: IdWidth,
200 candidates: &[IntEncoder],
201) -> LogicalEncoder {
202 let candidates = if candidates.is_empty() {
203 &[IntEncoder::varint()][..]
204 } else {
205 candidates
206 };
207
208 match id_width {
209 IdWidth::Id32 | IdWidth::OptId32 => {
210 #[expect(
211 clippy::cast_possible_truncation,
212 reason = "width was deduced as ≤ u32::MAX so truncation is safe"
213 )]
214 let vals: Vec<u32> = ids.iter().flatten().map(|&v| v as u32).collect();
215 DataProfile::compete_u32(candidates, &vals).logical
216 }
217 IdWidth::Id64 | IdWidth::OptId64 => {
218 let vals: Vec<u64> = ids.iter().flatten().copied().collect();
219 DataProfile::compete_u64(candidates, &vals).logical
220 }
221 }
222}
223
224fn filter_varint(candidates: &[IntEncoder]) -> Vec<IntEncoder> {
227 let filtered: Vec<IntEncoder> = candidates
228 .iter()
229 .copied()
230 .filter(|enc| enc.physical == PhysicalEncoder::VarInt)
231 .collect();
232 if filtered.is_empty() {
233 vec![IntEncoder::varint()]
234 } else {
235 filtered
236 }
237}
238
239impl IdValues {
240 pub fn encode(self, encoder: IdEncoder) -> Result<Option<EncodedId>, MltError> {
243 if self.0.is_empty() {
244 Ok(None)
245 } else {
246 Ok(Some(EncodedId::encode(&self, encoder)?))
247 }
248 }
249
250 pub fn encode_with_profile(
252 &self,
253 profile: &IdProfile,
254 ) -> Result<(Option<EncodedId>, Option<IdEncoder>), MltError> {
255 if self.0.is_empty() {
256 return Ok((None, None));
257 }
258 let enc = apply_profile(self, profile);
259 let encoded = EncodedId::encode(self, enc)?;
260 Ok((Some(encoded), Some(enc)))
261 }
262
263 pub fn encode_auto(self) -> Result<(Option<EncodedId>, Option<IdEncoder>), MltError> {
265 if self.0.is_empty() {
266 return Ok((None, None));
267 }
268 let enc = optimize(&self);
269 let encoded = EncodedId::encode(&self, enc)?;
270 Ok((Some(encoded), Some(enc)))
271 }
272}