1pub mod pb {
8 #![allow(clippy::all)]
9 #![allow(non_upper_case_globals)]
10 #![allow(non_camel_case_types)]
11 #![allow(non_snake_case)]
12 #![allow(unused)]
13 #![allow(improper_ctypes)]
14 #![allow(clippy::upper_case_acronyms)]
15 #![allow(clippy::use_self)]
16 include!(concat!(env!("OUT_DIR"), "/lance.encodings.rs"));
17}
18
19pub mod pb21 {
24 #![allow(clippy::all)]
25 #![allow(non_upper_case_globals)]
26 #![allow(non_camel_case_types)]
27 #![allow(non_snake_case)]
28 #![allow(unused)]
29 #![allow(improper_ctypes)]
30 #![allow(clippy::upper_case_acronyms)]
31 #![allow(clippy::use_self)]
32 include!(concat!(env!("OUT_DIR"), "/lance.encodings21.rs"));
33}
34
35use pb::{
36 array_encoding::ArrayEncoding as ArrayEncodingEnum,
37 buffer::BufferType,
38 nullable::{AllNull, NoNull, Nullability, SomeNull},
39 ArrayEncoding, Binary, Bitpacked, BitpackedForNonNeg, Block, Dictionary, FixedSizeBinary,
40 FixedSizeList, Flat, Fsst, InlineBitpacking, Nullable, OutOfLineBitpacking, PackedStruct,
41 PackedStructFixedWidthMiniBlock, Rle, Variable,
42};
43
44use crate::{
45 encodings::physical::block::CompressionConfig,
46 format::pb21::{compressive_encoding::Compression, CompressiveEncoding},
47 repdef::DefinitionInterpretation,
48};
49
50use self::pb::Constant;
51use lance_core::Result;
52
53pub struct ProtobufUtils {}
55
56impl ProtobufUtils {
57 pub fn constant(value: Vec<u8>) -> ArrayEncoding {
58 ArrayEncoding {
59 array_encoding: Some(ArrayEncodingEnum::Constant(Constant {
60 value: value.into(),
61 })),
62 }
63 }
64
65 pub fn basic_all_null_encoding() -> ArrayEncoding {
66 ArrayEncoding {
67 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
68 nullability: Some(Nullability::AllNulls(AllNull {})),
69 }))),
70 }
71 }
72
73 pub fn basic_some_null_encoding(
74 validity: ArrayEncoding,
75 values: ArrayEncoding,
76 ) -> ArrayEncoding {
77 ArrayEncoding {
78 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
79 nullability: Some(Nullability::SomeNulls(Box::new(SomeNull {
80 validity: Some(Box::new(validity)),
81 values: Some(Box::new(values)),
82 }))),
83 }))),
84 }
85 }
86
87 pub fn basic_no_null_encoding(values: ArrayEncoding) -> ArrayEncoding {
88 ArrayEncoding {
89 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
90 nullability: Some(Nullability::NoNulls(Box::new(NoNull {
91 values: Some(Box::new(values)),
92 }))),
93 }))),
94 }
95 }
96
97 pub fn block(scheme: &str) -> ArrayEncoding {
98 ArrayEncoding {
99 array_encoding: Some(ArrayEncodingEnum::Block(Block {
100 scheme: scheme.to_string(),
101 })),
102 }
103 }
104
105 pub fn flat_encoding(
106 bits_per_value: u64,
107 buffer_index: u32,
108 compression: Option<CompressionConfig>,
109 ) -> ArrayEncoding {
110 ArrayEncoding {
111 array_encoding: Some(ArrayEncodingEnum::Flat(Flat {
112 bits_per_value,
113 buffer: Some(pb::Buffer {
114 buffer_index,
115 buffer_type: BufferType::Page as i32,
116 }),
117 compression: compression.map(|compression_config| pb::Compression {
118 scheme: compression_config.scheme.to_string(),
119 level: compression_config.level,
120 }),
121 })),
122 }
123 }
124
125 pub fn fsl_encoding(dimension: u64, items: ArrayEncoding, has_validity: bool) -> ArrayEncoding {
126 ArrayEncoding {
127 array_encoding: Some(ArrayEncodingEnum::FixedSizeList(Box::new(FixedSizeList {
128 dimension: dimension.try_into().unwrap(),
129 items: Some(Box::new(items)),
130 has_validity,
131 }))),
132 }
133 }
134
135 pub fn bitpacked_encoding(
136 compressed_bits_per_value: u64,
137 uncompressed_bits_per_value: u64,
138 buffer_index: u32,
139 signed: bool,
140 ) -> ArrayEncoding {
141 ArrayEncoding {
142 array_encoding: Some(ArrayEncodingEnum::Bitpacked(Bitpacked {
143 compressed_bits_per_value,
144 buffer: Some(pb::Buffer {
145 buffer_index,
146 buffer_type: BufferType::Page as i32,
147 }),
148 uncompressed_bits_per_value,
149 signed,
150 })),
151 }
152 }
153
154 pub fn bitpacked_for_non_neg_encoding(
155 compressed_bits_per_value: u64,
156 uncompressed_bits_per_value: u64,
157 buffer_index: u32,
158 ) -> ArrayEncoding {
159 ArrayEncoding {
160 array_encoding: Some(ArrayEncodingEnum::BitpackedForNonNeg(BitpackedForNonNeg {
161 compressed_bits_per_value,
162 buffer: Some(pb::Buffer {
163 buffer_index,
164 buffer_type: BufferType::Page as i32,
165 }),
166 uncompressed_bits_per_value,
167 })),
168 }
169 }
170 pub fn inline_bitpacking(uncompressed_bits_per_value: u64) -> ArrayEncoding {
171 ArrayEncoding {
172 array_encoding: Some(ArrayEncodingEnum::InlineBitpacking(InlineBitpacking {
173 uncompressed_bits_per_value,
174 })),
175 }
176 }
177 pub fn out_of_line_bitpacking(
178 uncompressed_bits_per_value: u64,
179 compressed_bits_per_value: u64,
180 ) -> ArrayEncoding {
181 ArrayEncoding {
182 array_encoding: Some(ArrayEncodingEnum::OutOfLineBitpacking(
183 OutOfLineBitpacking {
184 uncompressed_bits_per_value,
185 compressed_bits_per_value,
186 },
187 )),
188 }
189 }
190
191 pub fn variable(bits_per_offset: u8) -> ArrayEncoding {
192 ArrayEncoding {
193 array_encoding: Some(ArrayEncodingEnum::Variable(Variable {
194 bits_per_offset: bits_per_offset as u32,
195 })),
196 }
197 }
198
199 pub fn fsst(data: ArrayEncoding, symbol_table: Vec<u8>) -> ArrayEncoding {
203 ArrayEncoding {
204 array_encoding: Some(ArrayEncodingEnum::Fsst(Box::new(Fsst {
205 binary: Some(Box::new(data)),
206 symbol_table: symbol_table.into(),
207 }))),
208 }
209 }
210
211 pub fn rle(bits_per_value: u64) -> ArrayEncoding {
212 ArrayEncoding {
213 array_encoding: Some(ArrayEncodingEnum::Rle(Rle { bits_per_value })),
214 }
215 }
216
217 pub fn byte_stream_split(bits_per_value: u64) -> ArrayEncoding {
218 ArrayEncoding {
219 array_encoding: Some(ArrayEncodingEnum::ByteStreamSplit(pb::ByteStreamSplit {
220 bits_per_value,
221 })),
222 }
223 }
224
225 pub fn general_mini_block(
226 inner: ArrayEncoding,
227 compression: CompressionConfig,
228 ) -> ArrayEncoding {
229 ArrayEncoding {
230 array_encoding: Some(ArrayEncodingEnum::GeneralMiniBlock(Box::new(
231 pb::GeneralMiniBlock {
232 inner: Some(Box::new(inner)),
233 compression: Some(pb::Compression {
234 scheme: compression.scheme.to_string(),
235 level: compression.level,
236 }),
237 },
238 ))),
239 }
240 }
241
242 pub fn packed_struct(
243 child_encodings: Vec<ArrayEncoding>,
244 packed_buffer_index: u32,
245 ) -> ArrayEncoding {
246 ArrayEncoding {
247 array_encoding: Some(ArrayEncodingEnum::PackedStruct(PackedStruct {
248 inner: child_encodings,
249 buffer: Some(pb::Buffer {
250 buffer_index: packed_buffer_index,
251 buffer_type: BufferType::Page as i32,
252 }),
253 })),
254 }
255 }
256
257 pub fn packed_struct_fixed_width_mini_block(
258 data: ArrayEncoding,
259 bits_per_values: Vec<u32>,
260 ) -> ArrayEncoding {
261 ArrayEncoding {
262 array_encoding: Some(ArrayEncodingEnum::PackedStructFixedWidthMiniBlock(
263 Box::new(PackedStructFixedWidthMiniBlock {
264 flat: Some(Box::new(data)),
265 bits_per_values,
266 }),
267 )),
268 }
269 }
270
271 pub fn binary(
272 indices_encoding: ArrayEncoding,
273 bytes_encoding: ArrayEncoding,
274 null_adjustment: u64,
275 ) -> ArrayEncoding {
276 ArrayEncoding {
277 array_encoding: Some(ArrayEncodingEnum::Binary(Box::new(Binary {
278 bytes: Some(Box::new(bytes_encoding)),
279 indices: Some(Box::new(indices_encoding)),
280 null_adjustment,
281 }))),
282 }
283 }
284
285 pub fn dict_encoding(
286 indices: ArrayEncoding,
287 items: ArrayEncoding,
288 num_items: u32,
289 ) -> ArrayEncoding {
290 ArrayEncoding {
291 array_encoding: Some(ArrayEncodingEnum::Dictionary(Box::new(Dictionary {
292 indices: Some(Box::new(indices)),
293 items: Some(Box::new(items)),
294 num_dictionary_items: num_items,
295 }))),
296 }
297 }
298
299 pub fn fixed_size_binary(data: ArrayEncoding, byte_width: u32) -> ArrayEncoding {
300 ArrayEncoding {
301 array_encoding: Some(ArrayEncodingEnum::FixedSizeBinary(Box::new(
302 FixedSizeBinary {
303 bytes: Some(Box::new(data)),
304 byte_width,
305 },
306 ))),
307 }
308 }
309}
310
311pub struct ProtobufUtils21 {}
312
313impl ProtobufUtils21 {
314 pub fn flat(
315 bits_per_value: u64,
316 values_compression: Option<pb21::BufferCompression>,
317 ) -> CompressiveEncoding {
318 CompressiveEncoding {
319 compression: Some(Compression::Flat(pb21::Flat {
320 bits_per_value,
321 data: values_compression,
322 })),
323 }
324 }
325
326 pub fn fsl(
327 items_per_value: u64,
328 has_validity: bool,
329 values: CompressiveEncoding,
330 ) -> CompressiveEncoding {
331 CompressiveEncoding {
332 compression: Some(Compression::FixedSizeList(Box::new(pb21::FixedSizeList {
333 items_per_value,
334 has_validity,
335 values: Some(Box::new(values)),
336 }))),
337 }
338 }
339
340 pub fn variable(
341 offsets_desc: CompressiveEncoding,
342 values_compression: Option<pb21::BufferCompression>,
343 ) -> CompressiveEncoding {
344 CompressiveEncoding {
345 compression: Some(Compression::Variable(Box::new(pb21::Variable {
346 offsets: Some(Box::new(offsets_desc)),
347 values: values_compression,
348 }))),
349 }
350 }
351
352 pub fn inline_bitpacking(
353 uncompressed_bits_per_value: u64,
354 values_compression: Option<pb21::BufferCompression>,
355 ) -> CompressiveEncoding {
356 CompressiveEncoding {
357 compression: Some(Compression::InlineBitpacking(pb21::InlineBitpacking {
358 uncompressed_bits_per_value,
359 values: values_compression,
360 })),
361 }
362 }
363
364 pub fn out_of_line_bitpacking(
365 uncompressed_bits_per_value: u64,
366 values: CompressiveEncoding,
367 ) -> CompressiveEncoding {
368 CompressiveEncoding {
369 compression: Some(Compression::OutOfLineBitpacking(Box::new(
370 pb21::OutOfLineBitpacking {
371 uncompressed_bits_per_value,
372 values: Some(Box::new(values)),
373 },
374 ))),
375 }
376 }
377
378 pub fn buffer_compression(compression: CompressionConfig) -> Result<pb21::BufferCompression> {
379 Ok(pb21::BufferCompression {
380 scheme: pb21::CompressionScheme::try_from(compression.scheme)? as i32,
381 level: compression.level,
382 })
383 }
384
385 pub fn wrapped(
386 compression: CompressionConfig,
387 values: CompressiveEncoding,
388 ) -> Result<CompressiveEncoding> {
389 Ok(CompressiveEncoding {
390 compression: Some(Compression::General(Box::new(pb21::General {
391 compression: Some(Self::buffer_compression(compression)?),
392 values: Some(Box::new(values)),
393 }))),
394 })
395 }
396
397 pub fn rle(
398 values: CompressiveEncoding,
399 run_lengths: CompressiveEncoding,
400 ) -> CompressiveEncoding {
401 CompressiveEncoding {
402 compression: Some(Compression::Rle(Box::new(pb21::Rle {
403 values: Some(Box::new(values)),
404 run_lengths: Some(Box::new(run_lengths)),
405 }))),
406 }
407 }
408
409 pub fn byte_stream_split(values: CompressiveEncoding) -> CompressiveEncoding {
410 CompressiveEncoding {
411 compression: Some(Compression::ByteStreamSplit(Box::new(
412 pb21::ByteStreamSplit {
413 values: Some(Box::new(values)),
414 },
415 ))),
416 }
417 }
418
419 pub fn fsst(data: CompressiveEncoding, symbol_table: Vec<u8>) -> CompressiveEncoding {
420 CompressiveEncoding {
421 compression: Some(Compression::Fsst(Box::new(pb21::Fsst {
422 symbol_table: symbol_table.into(),
423 values: Some(Box::new(data)),
424 }))),
425 }
426 }
427
428 pub fn packed_struct(
429 values: CompressiveEncoding,
430 bits_per_values: Vec<u64>,
431 ) -> CompressiveEncoding {
432 CompressiveEncoding {
433 compression: Some(Compression::PackedStruct(Box::new(pb21::PackedStruct {
434 values: Some(Box::new(values)),
435 bits_per_value: bits_per_values,
436 }))),
437 }
438 }
439
440 fn def_inter_to_repdef_layer(def: DefinitionInterpretation) -> i32 {
441 match def {
442 DefinitionInterpretation::AllValidItem => pb21::RepDefLayer::RepdefAllValidItem as i32,
443 DefinitionInterpretation::AllValidList => pb21::RepDefLayer::RepdefAllValidList as i32,
444 DefinitionInterpretation::NullableItem => pb21::RepDefLayer::RepdefNullableItem as i32,
445 DefinitionInterpretation::NullableList => pb21::RepDefLayer::RepdefNullableList as i32,
446 DefinitionInterpretation::EmptyableList => {
447 pb21::RepDefLayer::RepdefEmptyableList as i32
448 }
449 DefinitionInterpretation::NullableAndEmptyableList => {
450 pb21::RepDefLayer::RepdefNullAndEmptyList as i32
451 }
452 }
453 }
454
455 pub fn repdef_layer_to_def_interp(layer: i32) -> DefinitionInterpretation {
456 let layer = pb21::RepDefLayer::try_from(layer).unwrap();
457 match layer {
458 pb21::RepDefLayer::RepdefAllValidItem => DefinitionInterpretation::AllValidItem,
459 pb21::RepDefLayer::RepdefAllValidList => DefinitionInterpretation::AllValidList,
460 pb21::RepDefLayer::RepdefNullableItem => DefinitionInterpretation::NullableItem,
461 pb21::RepDefLayer::RepdefNullableList => DefinitionInterpretation::NullableList,
462 pb21::RepDefLayer::RepdefEmptyableList => DefinitionInterpretation::EmptyableList,
463 pb21::RepDefLayer::RepdefNullAndEmptyList => {
464 DefinitionInterpretation::NullableAndEmptyableList
465 }
466 pb21::RepDefLayer::RepdefUnspecified => panic!("Unspecified repdef layer"),
467 }
468 }
469
470 #[allow(clippy::too_many_arguments)]
471 pub fn miniblock_layout(
472 rep_encoding: Option<CompressiveEncoding>,
473 def_encoding: Option<CompressiveEncoding>,
474 value_encoding: CompressiveEncoding,
475 repetition_index_depth: u32,
476 num_buffers: u64,
477 dictionary_encoding: Option<(CompressiveEncoding, u64)>,
478 def_meaning: &[DefinitionInterpretation],
479 num_items: u64,
480 ) -> pb21::PageLayout {
481 assert!(!def_meaning.is_empty());
482 let (dictionary, num_dictionary_items) = dictionary_encoding
483 .map(|(d, i)| (Some(d), i))
484 .unwrap_or((None, 0));
485 pb21::PageLayout {
486 layout: Some(pb21::page_layout::Layout::MiniBlockLayout(
487 pb21::MiniBlockLayout {
488 def_compression: def_encoding,
489 rep_compression: rep_encoding,
490 value_compression: Some(value_encoding),
491 repetition_index_depth,
492 num_buffers,
493 dictionary,
494 num_dictionary_items,
495 layers: def_meaning
496 .iter()
497 .map(|&def| Self::def_inter_to_repdef_layer(def))
498 .collect(),
499 num_items,
500 },
501 )),
502 }
503 }
504
505 fn full_zip_layout(
506 bits_rep: u8,
507 bits_def: u8,
508 details: pb21::full_zip_layout::Details,
509 value_encoding: CompressiveEncoding,
510 def_meaning: &[DefinitionInterpretation],
511 num_items: u32,
512 num_visible_items: u32,
513 ) -> pb21::PageLayout {
514 pb21::PageLayout {
515 layout: Some(pb21::page_layout::Layout::FullZipLayout(
516 pb21::FullZipLayout {
517 bits_rep: bits_rep as u32,
518 bits_def: bits_def as u32,
519 details: Some(details),
520 value_compression: Some(value_encoding),
521 num_items,
522 num_visible_items,
523 layers: def_meaning
524 .iter()
525 .map(|&def| Self::def_inter_to_repdef_layer(def))
526 .collect(),
527 },
528 )),
529 }
530 }
531
532 pub fn fixed_full_zip_layout(
533 bits_rep: u8,
534 bits_def: u8,
535 bits_per_value: u32,
536 value_encoding: CompressiveEncoding,
537 def_meaning: &[DefinitionInterpretation],
538 num_items: u32,
539 num_visible_items: u32,
540 ) -> pb21::PageLayout {
541 Self::full_zip_layout(
542 bits_rep,
543 bits_def,
544 pb21::full_zip_layout::Details::BitsPerValue(bits_per_value),
545 value_encoding,
546 def_meaning,
547 num_items,
548 num_visible_items,
549 )
550 }
551
552 pub fn variable_full_zip_layout(
553 bits_rep: u8,
554 bits_def: u8,
555 bits_per_offset: u32,
556 value_encoding: CompressiveEncoding,
557 def_meaning: &[DefinitionInterpretation],
558 num_items: u32,
559 num_visible_items: u32,
560 ) -> pb21::PageLayout {
561 Self::full_zip_layout(
562 bits_rep,
563 bits_def,
564 pb21::full_zip_layout::Details::BitsPerOffset(bits_per_offset),
565 value_encoding,
566 def_meaning,
567 num_items,
568 num_visible_items,
569 )
570 }
571
572 pub fn all_null_layout(def_meaning: &[DefinitionInterpretation]) -> pb21::PageLayout {
573 pb21::PageLayout {
574 layout: Some(pb21::page_layout::Layout::AllNullLayout(
575 pb21::AllNullLayout {
576 layers: def_meaning
577 .iter()
578 .map(|&def| Self::def_inter_to_repdef_layer(def))
579 .collect(),
580 },
581 )),
582 }
583 }
584
585 pub fn simple_all_null_layout() -> pb21::PageLayout {
586 Self::all_null_layout(&[DefinitionInterpretation::NullableItem])
587 }
588}