1pub mod pb {
8 #![allow(clippy::all)]
9 #![allow(non_upper_case_globals)]
10 #![allow(non_camel_case_types)]
11 #![allow(non_snake_case)]
12 #![allow(unused)]
13 #![allow(improper_ctypes)]
14 #![allow(clippy::upper_case_acronyms)]
15 #![allow(clippy::use_self)]
16 include!(concat!(env!("OUT_DIR"), "/lance.encodings.rs"));
17}
18
19pub mod pb21 {
24 #![allow(clippy::all)]
25 #![allow(non_upper_case_globals)]
26 #![allow(non_camel_case_types)]
27 #![allow(non_snake_case)]
28 #![allow(unused)]
29 #![allow(improper_ctypes)]
30 #![allow(clippy::upper_case_acronyms)]
31 #![allow(clippy::use_self)]
32 include!(concat!(env!("OUT_DIR"), "/lance.encodings21.rs"));
33}
34
35use pb::{
36 array_encoding::ArrayEncoding as ArrayEncodingEnum,
37 buffer::BufferType,
38 nullable::{AllNull, NoNull, Nullability, SomeNull},
39 ArrayEncoding, Binary, Bitpacked, BitpackedForNonNeg, Block, Dictionary, FixedSizeBinary,
40 FixedSizeList, Flat, Fsst, InlineBitpacking, Nullable, OutOfLineBitpacking, PackedStruct,
41 PackedStructFixedWidthMiniBlock, Rle, Variable,
42};
43
44use crate::{
45 encodings::physical::block::CompressionConfig,
46 format::pb21::{compressive_encoding::Compression, CompressiveEncoding},
47 repdef::DefinitionInterpretation,
48};
49
50use self::pb::Constant;
51use lance_core::Result;
52
53pub struct ProtobufUtils {}
55
56impl ProtobufUtils {
57 pub fn constant(value: Vec<u8>) -> ArrayEncoding {
58 ArrayEncoding {
59 array_encoding: Some(ArrayEncodingEnum::Constant(Constant {
60 value: value.into(),
61 })),
62 }
63 }
64
65 pub fn basic_all_null_encoding() -> ArrayEncoding {
66 ArrayEncoding {
67 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
68 nullability: Some(Nullability::AllNulls(AllNull {})),
69 }))),
70 }
71 }
72
73 pub fn basic_some_null_encoding(
74 validity: ArrayEncoding,
75 values: ArrayEncoding,
76 ) -> ArrayEncoding {
77 ArrayEncoding {
78 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
79 nullability: Some(Nullability::SomeNulls(Box::new(SomeNull {
80 validity: Some(Box::new(validity)),
81 values: Some(Box::new(values)),
82 }))),
83 }))),
84 }
85 }
86
87 pub fn basic_no_null_encoding(values: ArrayEncoding) -> ArrayEncoding {
88 ArrayEncoding {
89 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
90 nullability: Some(Nullability::NoNulls(Box::new(NoNull {
91 values: Some(Box::new(values)),
92 }))),
93 }))),
94 }
95 }
96
97 pub fn block(scheme: &str) -> ArrayEncoding {
98 ArrayEncoding {
99 array_encoding: Some(ArrayEncodingEnum::Block(Block {
100 scheme: scheme.to_string(),
101 })),
102 }
103 }
104
105 pub fn flat_encoding(
106 bits_per_value: u64,
107 buffer_index: u32,
108 compression: Option<CompressionConfig>,
109 ) -> ArrayEncoding {
110 ArrayEncoding {
111 array_encoding: Some(ArrayEncodingEnum::Flat(Flat {
112 bits_per_value,
113 buffer: Some(pb::Buffer {
114 buffer_index,
115 buffer_type: BufferType::Page as i32,
116 }),
117 compression: compression.map(|compression_config| pb::Compression {
118 scheme: compression_config.scheme.to_string(),
119 level: compression_config.level,
120 }),
121 })),
122 }
123 }
124
125 pub fn fsl_encoding(dimension: u64, items: ArrayEncoding, has_validity: bool) -> ArrayEncoding {
126 ArrayEncoding {
127 array_encoding: Some(ArrayEncodingEnum::FixedSizeList(Box::new(FixedSizeList {
128 dimension: dimension.try_into().unwrap(),
129 items: Some(Box::new(items)),
130 has_validity,
131 }))),
132 }
133 }
134
135 pub fn bitpacked_encoding(
136 compressed_bits_per_value: u64,
137 uncompressed_bits_per_value: u64,
138 buffer_index: u32,
139 signed: bool,
140 ) -> ArrayEncoding {
141 ArrayEncoding {
142 array_encoding: Some(ArrayEncodingEnum::Bitpacked(Bitpacked {
143 compressed_bits_per_value,
144 buffer: Some(pb::Buffer {
145 buffer_index,
146 buffer_type: BufferType::Page as i32,
147 }),
148 uncompressed_bits_per_value,
149 signed,
150 })),
151 }
152 }
153
154 pub fn bitpacked_for_non_neg_encoding(
155 compressed_bits_per_value: u64,
156 uncompressed_bits_per_value: u64,
157 buffer_index: u32,
158 ) -> ArrayEncoding {
159 ArrayEncoding {
160 array_encoding: Some(ArrayEncodingEnum::BitpackedForNonNeg(BitpackedForNonNeg {
161 compressed_bits_per_value,
162 buffer: Some(pb::Buffer {
163 buffer_index,
164 buffer_type: BufferType::Page as i32,
165 }),
166 uncompressed_bits_per_value,
167 })),
168 }
169 }
170 pub fn inline_bitpacking(uncompressed_bits_per_value: u64) -> ArrayEncoding {
171 ArrayEncoding {
172 array_encoding: Some(ArrayEncodingEnum::InlineBitpacking(InlineBitpacking {
173 uncompressed_bits_per_value,
174 })),
175 }
176 }
177 pub fn out_of_line_bitpacking(
178 uncompressed_bits_per_value: u64,
179 compressed_bits_per_value: u64,
180 ) -> ArrayEncoding {
181 ArrayEncoding {
182 array_encoding: Some(ArrayEncodingEnum::OutOfLineBitpacking(
183 OutOfLineBitpacking {
184 uncompressed_bits_per_value,
185 compressed_bits_per_value,
186 },
187 )),
188 }
189 }
190
191 pub fn variable(bits_per_offset: u8) -> ArrayEncoding {
192 ArrayEncoding {
193 array_encoding: Some(ArrayEncodingEnum::Variable(Variable {
194 bits_per_offset: bits_per_offset as u32,
195 })),
196 }
197 }
198
199 pub fn fsst(data: ArrayEncoding, symbol_table: Vec<u8>) -> ArrayEncoding {
203 ArrayEncoding {
204 array_encoding: Some(ArrayEncodingEnum::Fsst(Box::new(Fsst {
205 binary: Some(Box::new(data)),
206 symbol_table: symbol_table.into(),
207 }))),
208 }
209 }
210
211 pub fn rle(bits_per_value: u64) -> ArrayEncoding {
212 ArrayEncoding {
213 array_encoding: Some(ArrayEncodingEnum::Rle(Rle { bits_per_value })),
214 }
215 }
216
217 pub fn byte_stream_split(bits_per_value: u64) -> ArrayEncoding {
218 ArrayEncoding {
219 array_encoding: Some(ArrayEncodingEnum::ByteStreamSplit(pb::ByteStreamSplit {
220 bits_per_value,
221 })),
222 }
223 }
224
225 pub fn general_mini_block(
226 inner: ArrayEncoding,
227 compression: CompressionConfig,
228 ) -> ArrayEncoding {
229 ArrayEncoding {
230 array_encoding: Some(ArrayEncodingEnum::GeneralMiniBlock(Box::new(
231 pb::GeneralMiniBlock {
232 inner: Some(Box::new(inner)),
233 compression: Some(pb::Compression {
234 scheme: compression.scheme.to_string(),
235 level: compression.level,
236 }),
237 },
238 ))),
239 }
240 }
241
242 pub fn packed_struct(
243 child_encodings: Vec<ArrayEncoding>,
244 packed_buffer_index: u32,
245 ) -> ArrayEncoding {
246 ArrayEncoding {
247 array_encoding: Some(ArrayEncodingEnum::PackedStruct(PackedStruct {
248 inner: child_encodings,
249 buffer: Some(pb::Buffer {
250 buffer_index: packed_buffer_index,
251 buffer_type: BufferType::Page as i32,
252 }),
253 })),
254 }
255 }
256
257 pub fn packed_struct_fixed_width_mini_block(
258 data: ArrayEncoding,
259 bits_per_values: Vec<u32>,
260 ) -> ArrayEncoding {
261 ArrayEncoding {
262 array_encoding: Some(ArrayEncodingEnum::PackedStructFixedWidthMiniBlock(
263 Box::new(PackedStructFixedWidthMiniBlock {
264 flat: Some(Box::new(data)),
265 bits_per_values,
266 }),
267 )),
268 }
269 }
270
271 pub fn binary(
272 indices_encoding: ArrayEncoding,
273 bytes_encoding: ArrayEncoding,
274 null_adjustment: u64,
275 ) -> ArrayEncoding {
276 ArrayEncoding {
277 array_encoding: Some(ArrayEncodingEnum::Binary(Box::new(Binary {
278 bytes: Some(Box::new(bytes_encoding)),
279 indices: Some(Box::new(indices_encoding)),
280 null_adjustment,
281 }))),
282 }
283 }
284
285 pub fn dict_encoding(
286 indices: ArrayEncoding,
287 items: ArrayEncoding,
288 num_items: u32,
289 ) -> ArrayEncoding {
290 ArrayEncoding {
291 array_encoding: Some(ArrayEncodingEnum::Dictionary(Box::new(Dictionary {
292 indices: Some(Box::new(indices)),
293 items: Some(Box::new(items)),
294 num_dictionary_items: num_items,
295 }))),
296 }
297 }
298
299 pub fn fixed_size_binary(data: ArrayEncoding, byte_width: u32) -> ArrayEncoding {
300 ArrayEncoding {
301 array_encoding: Some(ArrayEncodingEnum::FixedSizeBinary(Box::new(
302 FixedSizeBinary {
303 bytes: Some(Box::new(data)),
304 byte_width,
305 },
306 ))),
307 }
308 }
309}
310
311pub struct ProtobufUtils21 {}
312
313impl ProtobufUtils21 {
314 pub fn flat(
315 bits_per_value: u64,
316 values_compression: Option<pb21::BufferCompression>,
317 ) -> CompressiveEncoding {
318 CompressiveEncoding {
319 compression: Some(Compression::Flat(pb21::Flat {
320 bits_per_value,
321 data: values_compression,
322 })),
323 }
324 }
325
326 pub fn constant(value: Option<bytes::Bytes>) -> CompressiveEncoding {
327 CompressiveEncoding {
328 compression: Some(Compression::Constant(pb21::Constant { value })),
329 }
330 }
331
332 pub fn fsl(
333 items_per_value: u64,
334 has_validity: bool,
335 values: CompressiveEncoding,
336 ) -> CompressiveEncoding {
337 CompressiveEncoding {
338 compression: Some(Compression::FixedSizeList(Box::new(pb21::FixedSizeList {
339 items_per_value,
340 has_validity,
341 values: Some(Box::new(values)),
342 }))),
343 }
344 }
345
346 pub fn variable(
347 offsets_desc: CompressiveEncoding,
348 values_compression: Option<pb21::BufferCompression>,
349 ) -> CompressiveEncoding {
350 CompressiveEncoding {
351 compression: Some(Compression::Variable(Box::new(pb21::Variable {
352 offsets: Some(Box::new(offsets_desc)),
353 values: values_compression,
354 }))),
355 }
356 }
357
358 pub fn inline_bitpacking(
359 uncompressed_bits_per_value: u64,
360 values_compression: Option<pb21::BufferCompression>,
361 ) -> CompressiveEncoding {
362 CompressiveEncoding {
363 compression: Some(Compression::InlineBitpacking(pb21::InlineBitpacking {
364 uncompressed_bits_per_value,
365 values: values_compression,
366 })),
367 }
368 }
369
370 pub fn out_of_line_bitpacking(
371 uncompressed_bits_per_value: u64,
372 values: CompressiveEncoding,
373 ) -> CompressiveEncoding {
374 CompressiveEncoding {
375 compression: Some(Compression::OutOfLineBitpacking(Box::new(
376 pb21::OutOfLineBitpacking {
377 uncompressed_bits_per_value,
378 values: Some(Box::new(values)),
379 },
380 ))),
381 }
382 }
383
384 pub fn buffer_compression(compression: CompressionConfig) -> Result<pb21::BufferCompression> {
385 Ok(pb21::BufferCompression {
386 scheme: pb21::CompressionScheme::try_from(compression.scheme)? as i32,
387 level: compression.level,
388 })
389 }
390
391 pub fn wrapped(
392 compression: CompressionConfig,
393 values: CompressiveEncoding,
394 ) -> Result<CompressiveEncoding> {
395 Ok(CompressiveEncoding {
396 compression: Some(Compression::General(Box::new(pb21::General {
397 compression: Some(Self::buffer_compression(compression)?),
398 values: Some(Box::new(values)),
399 }))),
400 })
401 }
402
403 pub fn rle(
404 values: CompressiveEncoding,
405 run_lengths: CompressiveEncoding,
406 ) -> CompressiveEncoding {
407 CompressiveEncoding {
408 compression: Some(Compression::Rle(Box::new(pb21::Rle {
409 values: Some(Box::new(values)),
410 run_lengths: Some(Box::new(run_lengths)),
411 }))),
412 }
413 }
414
415 pub fn byte_stream_split(values: CompressiveEncoding) -> CompressiveEncoding {
416 CompressiveEncoding {
417 compression: Some(Compression::ByteStreamSplit(Box::new(
418 pb21::ByteStreamSplit {
419 values: Some(Box::new(values)),
420 },
421 ))),
422 }
423 }
424
425 pub fn fsst(data: CompressiveEncoding, symbol_table: Vec<u8>) -> CompressiveEncoding {
426 CompressiveEncoding {
427 compression: Some(Compression::Fsst(Box::new(pb21::Fsst {
428 symbol_table: symbol_table.into(),
429 values: Some(Box::new(data)),
430 }))),
431 }
432 }
433
434 pub fn packed_struct(
435 values: CompressiveEncoding,
436 bits_per_values: Vec<u64>,
437 ) -> CompressiveEncoding {
438 CompressiveEncoding {
439 compression: Some(Compression::PackedStruct(Box::new(pb21::PackedStruct {
440 values: Some(Box::new(values)),
441 bits_per_value: bits_per_values,
442 }))),
443 }
444 }
445
446 fn def_inter_to_repdef_layer(def: DefinitionInterpretation) -> i32 {
447 match def {
448 DefinitionInterpretation::AllValidItem => pb21::RepDefLayer::RepdefAllValidItem as i32,
449 DefinitionInterpretation::AllValidList => pb21::RepDefLayer::RepdefAllValidList as i32,
450 DefinitionInterpretation::NullableItem => pb21::RepDefLayer::RepdefNullableItem as i32,
451 DefinitionInterpretation::NullableList => pb21::RepDefLayer::RepdefNullableList as i32,
452 DefinitionInterpretation::EmptyableList => {
453 pb21::RepDefLayer::RepdefEmptyableList as i32
454 }
455 DefinitionInterpretation::NullableAndEmptyableList => {
456 pb21::RepDefLayer::RepdefNullAndEmptyList as i32
457 }
458 }
459 }
460
461 pub fn repdef_layer_to_def_interp(layer: i32) -> DefinitionInterpretation {
462 let layer = pb21::RepDefLayer::try_from(layer).unwrap();
463 match layer {
464 pb21::RepDefLayer::RepdefAllValidItem => DefinitionInterpretation::AllValidItem,
465 pb21::RepDefLayer::RepdefAllValidList => DefinitionInterpretation::AllValidList,
466 pb21::RepDefLayer::RepdefNullableItem => DefinitionInterpretation::NullableItem,
467 pb21::RepDefLayer::RepdefNullableList => DefinitionInterpretation::NullableList,
468 pb21::RepDefLayer::RepdefEmptyableList => DefinitionInterpretation::EmptyableList,
469 pb21::RepDefLayer::RepdefNullAndEmptyList => {
470 DefinitionInterpretation::NullableAndEmptyableList
471 }
472 pb21::RepDefLayer::RepdefUnspecified => panic!("Unspecified repdef layer"),
473 }
474 }
475
476 #[allow(clippy::too_many_arguments)]
477 pub fn miniblock_layout(
478 rep_encoding: Option<CompressiveEncoding>,
479 def_encoding: Option<CompressiveEncoding>,
480 value_encoding: CompressiveEncoding,
481 repetition_index_depth: u32,
482 num_buffers: u64,
483 dictionary_encoding: Option<(CompressiveEncoding, u64)>,
484 def_meaning: &[DefinitionInterpretation],
485 num_items: u64,
486 ) -> pb21::PageLayout {
487 assert!(!def_meaning.is_empty());
488 let (dictionary, num_dictionary_items) = dictionary_encoding
489 .map(|(d, i)| (Some(d), i))
490 .unwrap_or((None, 0));
491 pb21::PageLayout {
492 layout: Some(pb21::page_layout::Layout::MiniBlockLayout(
493 pb21::MiniBlockLayout {
494 def_compression: def_encoding,
495 rep_compression: rep_encoding,
496 value_compression: Some(value_encoding),
497 repetition_index_depth,
498 num_buffers,
499 dictionary,
500 num_dictionary_items,
501 layers: def_meaning
502 .iter()
503 .map(|&def| Self::def_inter_to_repdef_layer(def))
504 .collect(),
505 num_items,
506 },
507 )),
508 }
509 }
510
511 fn full_zip_layout(
512 bits_rep: u8,
513 bits_def: u8,
514 details: pb21::full_zip_layout::Details,
515 value_encoding: CompressiveEncoding,
516 def_meaning: &[DefinitionInterpretation],
517 num_items: u32,
518 num_visible_items: u32,
519 ) -> pb21::PageLayout {
520 pb21::PageLayout {
521 layout: Some(pb21::page_layout::Layout::FullZipLayout(
522 pb21::FullZipLayout {
523 bits_rep: bits_rep as u32,
524 bits_def: bits_def as u32,
525 details: Some(details),
526 value_compression: Some(value_encoding),
527 num_items,
528 num_visible_items,
529 layers: def_meaning
530 .iter()
531 .map(|&def| Self::def_inter_to_repdef_layer(def))
532 .collect(),
533 },
534 )),
535 }
536 }
537
538 pub fn fixed_full_zip_layout(
539 bits_rep: u8,
540 bits_def: u8,
541 bits_per_value: u32,
542 value_encoding: CompressiveEncoding,
543 def_meaning: &[DefinitionInterpretation],
544 num_items: u32,
545 num_visible_items: u32,
546 ) -> pb21::PageLayout {
547 Self::full_zip_layout(
548 bits_rep,
549 bits_def,
550 pb21::full_zip_layout::Details::BitsPerValue(bits_per_value),
551 value_encoding,
552 def_meaning,
553 num_items,
554 num_visible_items,
555 )
556 }
557
558 pub fn variable_full_zip_layout(
559 bits_rep: u8,
560 bits_def: u8,
561 bits_per_offset: u32,
562 value_encoding: CompressiveEncoding,
563 def_meaning: &[DefinitionInterpretation],
564 num_items: u32,
565 num_visible_items: u32,
566 ) -> pb21::PageLayout {
567 Self::full_zip_layout(
568 bits_rep,
569 bits_def,
570 pb21::full_zip_layout::Details::BitsPerOffset(bits_per_offset),
571 value_encoding,
572 def_meaning,
573 num_items,
574 num_visible_items,
575 )
576 }
577
578 pub fn blob_layout(
579 inner_layout: pb21::PageLayout,
580 def_meaning: &[DefinitionInterpretation],
581 ) -> pb21::PageLayout {
582 pb21::PageLayout {
583 layout: Some(pb21::page_layout::Layout::BlobLayout(Box::new(
584 pb21::BlobLayout {
585 inner_layout: Some(Box::new(inner_layout)),
586 layers: def_meaning
587 .iter()
588 .map(|&def| Self::def_inter_to_repdef_layer(def))
589 .collect(),
590 },
591 ))),
592 }
593 }
594
595 pub fn all_null_layout(def_meaning: &[DefinitionInterpretation]) -> pb21::PageLayout {
596 pb21::PageLayout {
597 layout: Some(pb21::page_layout::Layout::AllNullLayout(
598 pb21::AllNullLayout {
599 layers: def_meaning
600 .iter()
601 .map(|&def| Self::def_inter_to_repdef_layer(def))
602 .collect(),
603 },
604 )),
605 }
606 }
607
608 pub fn simple_all_null_layout() -> pb21::PageLayout {
609 Self::all_null_layout(&[DefinitionInterpretation::NullableItem])
610 }
611}