1pub mod pb {
6 #![allow(clippy::all)]
7 #![allow(non_upper_case_globals)]
8 #![allow(non_camel_case_types)]
9 #![allow(non_snake_case)]
10 #![allow(unused)]
11 #![allow(improper_ctypes)]
12 #![allow(clippy::upper_case_acronyms)]
13 #![allow(clippy::use_self)]
14 include!(concat!(env!("OUT_DIR"), "/lance.encodings.rs"));
15}
16
17use pb::{
18 array_encoding::ArrayEncoding as ArrayEncodingEnum,
19 buffer::BufferType,
20 full_zip_layout,
21 nullable::{AllNull, NoNull, Nullability, SomeNull},
22 page_layout::Layout,
23 AllNullLayout, ArrayEncoding, Binary, Bitpacked, BitpackedForNonNeg, Block, Dictionary,
24 FixedSizeBinary, FixedSizeList, Flat, Fsst, InlineBitpacking, MiniBlockLayout, Nullable,
25 OutOfLineBitpacking, PackedStruct, PackedStructFixedWidthMiniBlock, PageLayout, RepDefLayer,
26 Rle, Variable,
27};
28
29use crate::{encodings::physical::block::CompressionConfig, repdef::DefinitionInterpretation};
30
31use self::pb::Constant;
32
33pub struct ProtobufUtils {}
35
36impl ProtobufUtils {
37 pub fn constant(value: Vec<u8>) -> ArrayEncoding {
38 ArrayEncoding {
39 array_encoding: Some(ArrayEncodingEnum::Constant(Constant {
40 value: value.into(),
41 })),
42 }
43 }
44
45 pub fn basic_all_null_encoding() -> ArrayEncoding {
46 ArrayEncoding {
47 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
48 nullability: Some(Nullability::AllNulls(AllNull {})),
49 }))),
50 }
51 }
52
53 pub fn basic_some_null_encoding(
54 validity: ArrayEncoding,
55 values: ArrayEncoding,
56 ) -> ArrayEncoding {
57 ArrayEncoding {
58 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
59 nullability: Some(Nullability::SomeNulls(Box::new(SomeNull {
60 validity: Some(Box::new(validity)),
61 values: Some(Box::new(values)),
62 }))),
63 }))),
64 }
65 }
66
67 pub fn basic_no_null_encoding(values: ArrayEncoding) -> ArrayEncoding {
68 ArrayEncoding {
69 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
70 nullability: Some(Nullability::NoNulls(Box::new(NoNull {
71 values: Some(Box::new(values)),
72 }))),
73 }))),
74 }
75 }
76
77 pub fn block(scheme: &str) -> ArrayEncoding {
78 ArrayEncoding {
79 array_encoding: Some(ArrayEncodingEnum::Block(Block {
80 scheme: scheme.to_string(),
81 })),
82 }
83 }
84
85 pub fn flat_encoding(
86 bits_per_value: u64,
87 buffer_index: u32,
88 compression: Option<CompressionConfig>,
89 ) -> ArrayEncoding {
90 ArrayEncoding {
91 array_encoding: Some(ArrayEncodingEnum::Flat(Flat {
92 bits_per_value,
93 buffer: Some(pb::Buffer {
94 buffer_index,
95 buffer_type: BufferType::Page as i32,
96 }),
97 compression: compression.map(|compression_config| pb::Compression {
98 scheme: compression_config.scheme.to_string(),
99 level: compression_config.level,
100 }),
101 })),
102 }
103 }
104
105 pub fn fsl_encoding(dimension: u64, items: ArrayEncoding, has_validity: bool) -> ArrayEncoding {
106 ArrayEncoding {
107 array_encoding: Some(ArrayEncodingEnum::FixedSizeList(Box::new(FixedSizeList {
108 dimension: dimension.try_into().unwrap(),
109 items: Some(Box::new(items)),
110 has_validity,
111 }))),
112 }
113 }
114
115 pub fn bitpacked_encoding(
116 compressed_bits_per_value: u64,
117 uncompressed_bits_per_value: u64,
118 buffer_index: u32,
119 signed: bool,
120 ) -> ArrayEncoding {
121 ArrayEncoding {
122 array_encoding: Some(ArrayEncodingEnum::Bitpacked(Bitpacked {
123 compressed_bits_per_value,
124 buffer: Some(pb::Buffer {
125 buffer_index,
126 buffer_type: BufferType::Page as i32,
127 }),
128 uncompressed_bits_per_value,
129 signed,
130 })),
131 }
132 }
133
134 pub fn bitpacked_for_non_neg_encoding(
135 compressed_bits_per_value: u64,
136 uncompressed_bits_per_value: u64,
137 buffer_index: u32,
138 ) -> ArrayEncoding {
139 ArrayEncoding {
140 array_encoding: Some(ArrayEncodingEnum::BitpackedForNonNeg(BitpackedForNonNeg {
141 compressed_bits_per_value,
142 buffer: Some(pb::Buffer {
143 buffer_index,
144 buffer_type: BufferType::Page as i32,
145 }),
146 uncompressed_bits_per_value,
147 })),
148 }
149 }
150 pub fn inline_bitpacking(uncompressed_bits_per_value: u64) -> ArrayEncoding {
151 ArrayEncoding {
152 array_encoding: Some(ArrayEncodingEnum::InlineBitpacking(InlineBitpacking {
153 uncompressed_bits_per_value,
154 })),
155 }
156 }
157 pub fn out_of_line_bitpacking(
158 uncompressed_bits_per_value: u64,
159 compressed_bits_per_value: u64,
160 ) -> ArrayEncoding {
161 ArrayEncoding {
162 array_encoding: Some(ArrayEncodingEnum::OutOfLineBitpacking(
163 OutOfLineBitpacking {
164 uncompressed_bits_per_value,
165 compressed_bits_per_value,
166 },
167 )),
168 }
169 }
170
171 pub fn variable(bits_per_offset: u8) -> ArrayEncoding {
172 ArrayEncoding {
173 array_encoding: Some(ArrayEncodingEnum::Variable(Variable {
174 bits_per_offset: bits_per_offset as u32,
175 })),
176 }
177 }
178
179 pub fn fsst(data: ArrayEncoding, symbol_table: Vec<u8>) -> ArrayEncoding {
183 ArrayEncoding {
184 array_encoding: Some(ArrayEncodingEnum::Fsst(Box::new(Fsst {
185 binary: Some(Box::new(data)),
186 symbol_table: symbol_table.into(),
187 }))),
188 }
189 }
190
191 pub fn rle(bits_per_value: u64) -> ArrayEncoding {
192 ArrayEncoding {
193 array_encoding: Some(ArrayEncodingEnum::Rle(Rle { bits_per_value })),
194 }
195 }
196
197 pub fn byte_stream_split(bits_per_value: u64) -> ArrayEncoding {
198 ArrayEncoding {
199 array_encoding: Some(ArrayEncodingEnum::ByteStreamSplit(pb::ByteStreamSplit {
200 bits_per_value,
201 })),
202 }
203 }
204
205 pub fn general_mini_block(
206 inner: ArrayEncoding,
207 compression: CompressionConfig,
208 ) -> ArrayEncoding {
209 ArrayEncoding {
210 array_encoding: Some(ArrayEncodingEnum::GeneralMiniBlock(Box::new(
211 pb::GeneralMiniBlock {
212 inner: Some(Box::new(inner)),
213 compression: Some(pb::Compression {
214 scheme: compression.scheme.to_string(),
215 level: compression.level,
216 }),
217 },
218 ))),
219 }
220 }
221
222 pub fn packed_struct(
223 child_encodings: Vec<ArrayEncoding>,
224 packed_buffer_index: u32,
225 ) -> ArrayEncoding {
226 ArrayEncoding {
227 array_encoding: Some(ArrayEncodingEnum::PackedStruct(PackedStruct {
228 inner: child_encodings,
229 buffer: Some(pb::Buffer {
230 buffer_index: packed_buffer_index,
231 buffer_type: BufferType::Page as i32,
232 }),
233 })),
234 }
235 }
236
237 pub fn packed_struct_fixed_width_mini_block(
238 data: ArrayEncoding,
239 bits_per_values: Vec<u32>,
240 ) -> ArrayEncoding {
241 ArrayEncoding {
242 array_encoding: Some(ArrayEncodingEnum::PackedStructFixedWidthMiniBlock(
243 Box::new(PackedStructFixedWidthMiniBlock {
244 flat: Some(Box::new(data)),
245 bits_per_values,
246 }),
247 )),
248 }
249 }
250
251 pub fn binary(
252 indices_encoding: ArrayEncoding,
253 bytes_encoding: ArrayEncoding,
254 null_adjustment: u64,
255 ) -> ArrayEncoding {
256 ArrayEncoding {
257 array_encoding: Some(ArrayEncodingEnum::Binary(Box::new(Binary {
258 bytes: Some(Box::new(bytes_encoding)),
259 indices: Some(Box::new(indices_encoding)),
260 null_adjustment,
261 }))),
262 }
263 }
264
265 pub fn dict_encoding(
266 indices: ArrayEncoding,
267 items: ArrayEncoding,
268 num_items: u32,
269 ) -> ArrayEncoding {
270 ArrayEncoding {
271 array_encoding: Some(ArrayEncodingEnum::Dictionary(Box::new(Dictionary {
272 indices: Some(Box::new(indices)),
273 items: Some(Box::new(items)),
274 num_dictionary_items: num_items,
275 }))),
276 }
277 }
278
279 pub fn fixed_size_binary(data: ArrayEncoding, byte_width: u32) -> ArrayEncoding {
280 ArrayEncoding {
281 array_encoding: Some(ArrayEncodingEnum::FixedSizeBinary(Box::new(
282 FixedSizeBinary {
283 bytes: Some(Box::new(data)),
284 byte_width,
285 },
286 ))),
287 }
288 }
289
290 fn def_inter_to_repdef_layer(def: DefinitionInterpretation) -> i32 {
291 match def {
292 DefinitionInterpretation::AllValidItem => RepDefLayer::RepdefAllValidItem as i32,
293 DefinitionInterpretation::AllValidList => RepDefLayer::RepdefAllValidList as i32,
294 DefinitionInterpretation::NullableItem => RepDefLayer::RepdefNullableItem as i32,
295 DefinitionInterpretation::NullableList => RepDefLayer::RepdefNullableList as i32,
296 DefinitionInterpretation::EmptyableList => RepDefLayer::RepdefEmptyableList as i32,
297 DefinitionInterpretation::NullableAndEmptyableList => {
298 RepDefLayer::RepdefNullAndEmptyList as i32
299 }
300 }
301 }
302
303 pub fn repdef_layer_to_def_interp(layer: i32) -> DefinitionInterpretation {
304 let layer = RepDefLayer::try_from(layer).unwrap();
305 match layer {
306 RepDefLayer::RepdefAllValidItem => DefinitionInterpretation::AllValidItem,
307 RepDefLayer::RepdefAllValidList => DefinitionInterpretation::AllValidList,
308 RepDefLayer::RepdefNullableItem => DefinitionInterpretation::NullableItem,
309 RepDefLayer::RepdefNullableList => DefinitionInterpretation::NullableList,
310 RepDefLayer::RepdefEmptyableList => DefinitionInterpretation::EmptyableList,
311 RepDefLayer::RepdefNullAndEmptyList => {
312 DefinitionInterpretation::NullableAndEmptyableList
313 }
314 RepDefLayer::RepdefUnspecified => panic!("Unspecified repdef layer"),
315 }
316 }
317
318 #[allow(clippy::too_many_arguments)]
319 pub fn miniblock_layout(
320 rep_encoding: Option<ArrayEncoding>,
321 def_encoding: Option<ArrayEncoding>,
322 value_encoding: ArrayEncoding,
323 repetition_index_depth: u32,
324 num_buffers: u64,
325 dictionary_encoding: Option<(ArrayEncoding, u64)>,
326 def_meaning: &[DefinitionInterpretation],
327 num_items: u64,
328 ) -> PageLayout {
329 assert!(!def_meaning.is_empty());
330 let (dictionary, num_dictionary_items) = dictionary_encoding
331 .map(|(d, i)| (Some(d), i))
332 .unwrap_or((None, 0));
333 PageLayout {
334 layout: Some(Layout::MiniBlockLayout(MiniBlockLayout {
335 def_compression: def_encoding,
336 rep_compression: rep_encoding,
337 value_compression: Some(value_encoding),
338 repetition_index_depth,
339 num_buffers,
340 dictionary,
341 num_dictionary_items,
342 layers: def_meaning
343 .iter()
344 .map(|&def| Self::def_inter_to_repdef_layer(def))
345 .collect(),
346 num_items,
347 })),
348 }
349 }
350
351 fn full_zip_layout(
352 bits_rep: u8,
353 bits_def: u8,
354 details: full_zip_layout::Details,
355 value_encoding: ArrayEncoding,
356 def_meaning: &[DefinitionInterpretation],
357 num_items: u32,
358 num_visible_items: u32,
359 ) -> PageLayout {
360 PageLayout {
361 layout: Some(Layout::FullZipLayout(pb::FullZipLayout {
362 bits_rep: bits_rep as u32,
363 bits_def: bits_def as u32,
364 details: Some(details),
365 value_compression: Some(value_encoding),
366 num_items,
367 num_visible_items,
368 layers: def_meaning
369 .iter()
370 .map(|&def| Self::def_inter_to_repdef_layer(def))
371 .collect(),
372 })),
373 }
374 }
375
376 pub fn fixed_full_zip_layout(
377 bits_rep: u8,
378 bits_def: u8,
379 bits_per_value: u32,
380 value_encoding: ArrayEncoding,
381 def_meaning: &[DefinitionInterpretation],
382 num_items: u32,
383 num_visible_items: u32,
384 ) -> PageLayout {
385 Self::full_zip_layout(
386 bits_rep,
387 bits_def,
388 full_zip_layout::Details::BitsPerValue(bits_per_value),
389 value_encoding,
390 def_meaning,
391 num_items,
392 num_visible_items,
393 )
394 }
395
396 pub fn variable_full_zip_layout(
397 bits_rep: u8,
398 bits_def: u8,
399 bits_per_offset: u32,
400 value_encoding: ArrayEncoding,
401 def_meaning: &[DefinitionInterpretation],
402 num_items: u32,
403 num_visible_items: u32,
404 ) -> PageLayout {
405 Self::full_zip_layout(
406 bits_rep,
407 bits_def,
408 full_zip_layout::Details::BitsPerOffset(bits_per_offset),
409 value_encoding,
410 def_meaning,
411 num_items,
412 num_visible_items,
413 )
414 }
415
416 pub fn all_null_layout(def_meaning: &[DefinitionInterpretation]) -> PageLayout {
417 PageLayout {
418 layout: Some(Layout::AllNullLayout(AllNullLayout {
419 layers: def_meaning
420 .iter()
421 .map(|&def| Self::def_inter_to_repdef_layer(def))
422 .collect(),
423 })),
424 }
425 }
426
427 pub fn simple_all_null_layout() -> PageLayout {
428 Self::all_null_layout(&[DefinitionInterpretation::NullableItem])
429 }
430}