1pub mod pb {
6 #![allow(clippy::all)]
7 #![allow(non_upper_case_globals)]
8 #![allow(non_camel_case_types)]
9 #![allow(non_snake_case)]
10 #![allow(unused)]
11 #![allow(improper_ctypes)]
12 #![allow(clippy::upper_case_acronyms)]
13 #![allow(clippy::use_self)]
14 include!(concat!(env!("OUT_DIR"), "/lance.encodings.rs"));
15}
16
17use pb::{
18 array_encoding::ArrayEncoding as ArrayEncodingEnum,
19 buffer::BufferType,
20 full_zip_layout,
21 nullable::{AllNull, NoNull, Nullability, SomeNull},
22 page_layout::Layout,
23 AllNullLayout, ArrayEncoding, Binary, Bitpacked, BitpackedForNonNeg, Block, Dictionary,
24 FixedSizeBinary, FixedSizeList, Flat, Fsst, InlineBitpacking, MiniBlockLayout, Nullable,
25 OutOfLineBitpacking, PackedStruct, PackedStructFixedWidthMiniBlock, PageLayout, RepDefLayer,
26 Variable,
27};
28
29use crate::{
30 encodings::physical::block_compress::CompressionConfig, repdef::DefinitionInterpretation,
31};
32
33use self::pb::Constant;
34
35pub struct ProtobufUtils {}
37
38impl ProtobufUtils {
39 pub fn constant(value: Vec<u8>) -> ArrayEncoding {
40 ArrayEncoding {
41 array_encoding: Some(ArrayEncodingEnum::Constant(Constant {
42 value: value.into(),
43 })),
44 }
45 }
46
47 pub fn basic_all_null_encoding() -> ArrayEncoding {
48 ArrayEncoding {
49 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
50 nullability: Some(Nullability::AllNulls(AllNull {})),
51 }))),
52 }
53 }
54
55 pub fn basic_some_null_encoding(
56 validity: ArrayEncoding,
57 values: ArrayEncoding,
58 ) -> ArrayEncoding {
59 ArrayEncoding {
60 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
61 nullability: Some(Nullability::SomeNulls(Box::new(SomeNull {
62 validity: Some(Box::new(validity)),
63 values: Some(Box::new(values)),
64 }))),
65 }))),
66 }
67 }
68
69 pub fn basic_no_null_encoding(values: ArrayEncoding) -> ArrayEncoding {
70 ArrayEncoding {
71 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
72 nullability: Some(Nullability::NoNulls(Box::new(NoNull {
73 values: Some(Box::new(values)),
74 }))),
75 }))),
76 }
77 }
78
79 pub fn block(scheme: &str) -> ArrayEncoding {
80 ArrayEncoding {
81 array_encoding: Some(ArrayEncodingEnum::Block(Block {
82 scheme: scheme.to_string(),
83 })),
84 }
85 }
86
87 pub fn flat_encoding(
88 bits_per_value: u64,
89 buffer_index: u32,
90 compression: Option<CompressionConfig>,
91 ) -> ArrayEncoding {
92 ArrayEncoding {
93 array_encoding: Some(ArrayEncodingEnum::Flat(Flat {
94 bits_per_value,
95 buffer: Some(pb::Buffer {
96 buffer_index,
97 buffer_type: BufferType::Page as i32,
98 }),
99 compression: compression.map(|compression_config| pb::Compression {
100 scheme: compression_config.scheme.to_string(),
101 level: compression_config.level,
102 }),
103 })),
104 }
105 }
106
107 pub fn fsl_encoding(dimension: u64, items: ArrayEncoding, has_validity: bool) -> ArrayEncoding {
108 ArrayEncoding {
109 array_encoding: Some(ArrayEncodingEnum::FixedSizeList(Box::new(FixedSizeList {
110 dimension: dimension.try_into().unwrap(),
111 items: Some(Box::new(items)),
112 has_validity,
113 }))),
114 }
115 }
116
117 pub fn bitpacked_encoding(
118 compressed_bits_per_value: u64,
119 uncompressed_bits_per_value: u64,
120 buffer_index: u32,
121 signed: bool,
122 ) -> ArrayEncoding {
123 ArrayEncoding {
124 array_encoding: Some(ArrayEncodingEnum::Bitpacked(Bitpacked {
125 compressed_bits_per_value,
126 buffer: Some(pb::Buffer {
127 buffer_index,
128 buffer_type: BufferType::Page as i32,
129 }),
130 uncompressed_bits_per_value,
131 signed,
132 })),
133 }
134 }
135
136 pub fn bitpacked_for_non_neg_encoding(
137 compressed_bits_per_value: u64,
138 uncompressed_bits_per_value: u64,
139 buffer_index: u32,
140 ) -> ArrayEncoding {
141 ArrayEncoding {
142 array_encoding: Some(ArrayEncodingEnum::BitpackedForNonNeg(BitpackedForNonNeg {
143 compressed_bits_per_value,
144 buffer: Some(pb::Buffer {
145 buffer_index,
146 buffer_type: BufferType::Page as i32,
147 }),
148 uncompressed_bits_per_value,
149 })),
150 }
151 }
152 pub fn inline_bitpacking(uncompressed_bits_per_value: u64) -> ArrayEncoding {
153 ArrayEncoding {
154 array_encoding: Some(ArrayEncodingEnum::InlineBitpacking(InlineBitpacking {
155 uncompressed_bits_per_value,
156 })),
157 }
158 }
159 pub fn out_of_line_bitpacking(
160 uncompressed_bits_per_value: u64,
161 compressed_bits_per_value: u64,
162 ) -> ArrayEncoding {
163 ArrayEncoding {
164 array_encoding: Some(ArrayEncodingEnum::OutOfLineBitpacking(
165 OutOfLineBitpacking {
166 uncompressed_bits_per_value,
167 compressed_bits_per_value,
168 },
169 )),
170 }
171 }
172
173 pub fn variable(bits_per_offset: u8) -> ArrayEncoding {
174 ArrayEncoding {
175 array_encoding: Some(ArrayEncodingEnum::Variable(Variable {
176 bits_per_offset: bits_per_offset as u32,
177 })),
178 }
179 }
180
181 pub fn fsst(data: ArrayEncoding, symbol_table: Vec<u8>) -> ArrayEncoding {
185 ArrayEncoding {
186 array_encoding: Some(ArrayEncodingEnum::Fsst(Box::new(Fsst {
187 binary: Some(Box::new(data)),
188 symbol_table: symbol_table.into(),
189 }))),
190 }
191 }
192
193 pub fn packed_struct(
194 child_encodings: Vec<ArrayEncoding>,
195 packed_buffer_index: u32,
196 ) -> ArrayEncoding {
197 ArrayEncoding {
198 array_encoding: Some(ArrayEncodingEnum::PackedStruct(PackedStruct {
199 inner: child_encodings,
200 buffer: Some(pb::Buffer {
201 buffer_index: packed_buffer_index,
202 buffer_type: BufferType::Page as i32,
203 }),
204 })),
205 }
206 }
207
208 pub fn packed_struct_fixed_width_mini_block(
209 data: ArrayEncoding,
210 bits_per_values: Vec<u32>,
211 ) -> ArrayEncoding {
212 ArrayEncoding {
213 array_encoding: Some(ArrayEncodingEnum::PackedStructFixedWidthMiniBlock(
214 Box::new(PackedStructFixedWidthMiniBlock {
215 flat: Some(Box::new(data)),
216 bits_per_values,
217 }),
218 )),
219 }
220 }
221
222 pub fn binary(
223 indices_encoding: ArrayEncoding,
224 bytes_encoding: ArrayEncoding,
225 null_adjustment: u64,
226 ) -> ArrayEncoding {
227 ArrayEncoding {
228 array_encoding: Some(ArrayEncodingEnum::Binary(Box::new(Binary {
229 bytes: Some(Box::new(bytes_encoding)),
230 indices: Some(Box::new(indices_encoding)),
231 null_adjustment,
232 }))),
233 }
234 }
235
236 pub fn dict_encoding(
237 indices: ArrayEncoding,
238 items: ArrayEncoding,
239 num_items: u32,
240 ) -> ArrayEncoding {
241 ArrayEncoding {
242 array_encoding: Some(ArrayEncodingEnum::Dictionary(Box::new(Dictionary {
243 indices: Some(Box::new(indices)),
244 items: Some(Box::new(items)),
245 num_dictionary_items: num_items,
246 }))),
247 }
248 }
249
250 pub fn fixed_size_binary(data: ArrayEncoding, byte_width: u32) -> ArrayEncoding {
251 ArrayEncoding {
252 array_encoding: Some(ArrayEncodingEnum::FixedSizeBinary(Box::new(
253 FixedSizeBinary {
254 bytes: Some(Box::new(data)),
255 byte_width,
256 },
257 ))),
258 }
259 }
260
261 fn def_inter_to_repdef_layer(def: DefinitionInterpretation) -> i32 {
262 match def {
263 DefinitionInterpretation::AllValidItem => RepDefLayer::RepdefAllValidItem as i32,
264 DefinitionInterpretation::AllValidList => RepDefLayer::RepdefAllValidList as i32,
265 DefinitionInterpretation::NullableItem => RepDefLayer::RepdefNullableItem as i32,
266 DefinitionInterpretation::NullableList => RepDefLayer::RepdefNullableList as i32,
267 DefinitionInterpretation::EmptyableList => RepDefLayer::RepdefEmptyableList as i32,
268 DefinitionInterpretation::NullableAndEmptyableList => {
269 RepDefLayer::RepdefNullAndEmptyList as i32
270 }
271 }
272 }
273
274 pub fn repdef_layer_to_def_interp(layer: i32) -> DefinitionInterpretation {
275 let layer = RepDefLayer::try_from(layer).unwrap();
276 match layer {
277 RepDefLayer::RepdefAllValidItem => DefinitionInterpretation::AllValidItem,
278 RepDefLayer::RepdefAllValidList => DefinitionInterpretation::AllValidList,
279 RepDefLayer::RepdefNullableItem => DefinitionInterpretation::NullableItem,
280 RepDefLayer::RepdefNullableList => DefinitionInterpretation::NullableList,
281 RepDefLayer::RepdefEmptyableList => DefinitionInterpretation::EmptyableList,
282 RepDefLayer::RepdefNullAndEmptyList => {
283 DefinitionInterpretation::NullableAndEmptyableList
284 }
285 RepDefLayer::RepdefUnspecified => panic!("Unspecified repdef layer"),
286 }
287 }
288
289 #[allow(clippy::too_many_arguments)]
290 pub fn miniblock_layout(
291 rep_encoding: Option<ArrayEncoding>,
292 def_encoding: Option<ArrayEncoding>,
293 value_encoding: ArrayEncoding,
294 repetition_index_depth: u32,
295 num_buffers: u64,
296 dictionary_encoding: Option<(ArrayEncoding, u64)>,
297 def_meaning: &[DefinitionInterpretation],
298 num_items: u64,
299 ) -> PageLayout {
300 assert!(!def_meaning.is_empty());
301 let (dictionary, num_dictionary_items) = dictionary_encoding
302 .map(|(d, i)| (Some(d), i))
303 .unwrap_or((None, 0));
304 PageLayout {
305 layout: Some(Layout::MiniBlockLayout(MiniBlockLayout {
306 def_compression: def_encoding,
307 rep_compression: rep_encoding,
308 value_compression: Some(value_encoding),
309 repetition_index_depth,
310 num_buffers,
311 dictionary,
312 num_dictionary_items,
313 layers: def_meaning
314 .iter()
315 .map(|&def| Self::def_inter_to_repdef_layer(def))
316 .collect(),
317 num_items,
318 })),
319 }
320 }
321
322 fn full_zip_layout(
323 bits_rep: u8,
324 bits_def: u8,
325 details: full_zip_layout::Details,
326 value_encoding: ArrayEncoding,
327 def_meaning: &[DefinitionInterpretation],
328 num_items: u32,
329 num_visible_items: u32,
330 ) -> PageLayout {
331 PageLayout {
332 layout: Some(Layout::FullZipLayout(pb::FullZipLayout {
333 bits_rep: bits_rep as u32,
334 bits_def: bits_def as u32,
335 details: Some(details),
336 value_compression: Some(value_encoding),
337 num_items,
338 num_visible_items,
339 layers: def_meaning
340 .iter()
341 .map(|&def| Self::def_inter_to_repdef_layer(def))
342 .collect(),
343 })),
344 }
345 }
346
347 pub fn fixed_full_zip_layout(
348 bits_rep: u8,
349 bits_def: u8,
350 bits_per_value: u32,
351 value_encoding: ArrayEncoding,
352 def_meaning: &[DefinitionInterpretation],
353 num_items: u32,
354 num_visible_items: u32,
355 ) -> PageLayout {
356 Self::full_zip_layout(
357 bits_rep,
358 bits_def,
359 full_zip_layout::Details::BitsPerValue(bits_per_value),
360 value_encoding,
361 def_meaning,
362 num_items,
363 num_visible_items,
364 )
365 }
366
367 pub fn variable_full_zip_layout(
368 bits_rep: u8,
369 bits_def: u8,
370 bits_per_offset: u32,
371 value_encoding: ArrayEncoding,
372 def_meaning: &[DefinitionInterpretation],
373 num_items: u32,
374 num_visible_items: u32,
375 ) -> PageLayout {
376 Self::full_zip_layout(
377 bits_rep,
378 bits_def,
379 full_zip_layout::Details::BitsPerOffset(bits_per_offset),
380 value_encoding,
381 def_meaning,
382 num_items,
383 num_visible_items,
384 )
385 }
386
387 pub fn all_null_layout(def_meaning: &[DefinitionInterpretation]) -> PageLayout {
388 PageLayout {
389 layout: Some(Layout::AllNullLayout(AllNullLayout {
390 layers: def_meaning
391 .iter()
392 .map(|&def| Self::def_inter_to_repdef_layer(def))
393 .collect(),
394 })),
395 }
396 }
397
398 pub fn simple_all_null_layout() -> PageLayout {
399 Self::all_null_layout(&[DefinitionInterpretation::NullableItem])
400 }
401}