1pub mod pb {
6 #![allow(clippy::all)]
7 #![allow(non_upper_case_globals)]
8 #![allow(non_camel_case_types)]
9 #![allow(non_snake_case)]
10 #![allow(unused)]
11 #![allow(improper_ctypes)]
12 #![allow(clippy::upper_case_acronyms)]
13 #![allow(clippy::use_self)]
14 include!(concat!(env!("OUT_DIR"), "/lance.encodings.rs"));
15}
16
17use pb::{
18 array_encoding::ArrayEncoding as ArrayEncodingEnum,
19 buffer::BufferType,
20 full_zip_layout,
21 nullable::{AllNull, NoNull, Nullability, SomeNull},
22 page_layout::Layout,
23 AllNullLayout, ArrayEncoding, Binary, Bitpacked, BitpackedForNonNeg, Block, Dictionary,
24 FixedSizeBinary, FixedSizeList, Flat, Fsst, InlineBitpacking, MiniBlockLayout, Nullable,
25 OutOfLineBitpacking, PackedStruct, PackedStructFixedWidthMiniBlock, PageLayout, RepDefLayer,
26 Variable,
27};
28
29use crate::{encodings::physical::block::CompressionConfig, repdef::DefinitionInterpretation};
30
31use self::pb::Constant;
32
33pub struct ProtobufUtils {}
35
36impl ProtobufUtils {
37 pub fn constant(value: Vec<u8>) -> ArrayEncoding {
38 ArrayEncoding {
39 array_encoding: Some(ArrayEncodingEnum::Constant(Constant {
40 value: value.into(),
41 })),
42 }
43 }
44
45 pub fn basic_all_null_encoding() -> ArrayEncoding {
46 ArrayEncoding {
47 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
48 nullability: Some(Nullability::AllNulls(AllNull {})),
49 }))),
50 }
51 }
52
53 pub fn basic_some_null_encoding(
54 validity: ArrayEncoding,
55 values: ArrayEncoding,
56 ) -> ArrayEncoding {
57 ArrayEncoding {
58 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
59 nullability: Some(Nullability::SomeNulls(Box::new(SomeNull {
60 validity: Some(Box::new(validity)),
61 values: Some(Box::new(values)),
62 }))),
63 }))),
64 }
65 }
66
67 pub fn basic_no_null_encoding(values: ArrayEncoding) -> ArrayEncoding {
68 ArrayEncoding {
69 array_encoding: Some(ArrayEncodingEnum::Nullable(Box::new(Nullable {
70 nullability: Some(Nullability::NoNulls(Box::new(NoNull {
71 values: Some(Box::new(values)),
72 }))),
73 }))),
74 }
75 }
76
77 pub fn block(scheme: &str) -> ArrayEncoding {
78 ArrayEncoding {
79 array_encoding: Some(ArrayEncodingEnum::Block(Block {
80 scheme: scheme.to_string(),
81 })),
82 }
83 }
84
85 pub fn flat_encoding(
86 bits_per_value: u64,
87 buffer_index: u32,
88 compression: Option<CompressionConfig>,
89 ) -> ArrayEncoding {
90 ArrayEncoding {
91 array_encoding: Some(ArrayEncodingEnum::Flat(Flat {
92 bits_per_value,
93 buffer: Some(pb::Buffer {
94 buffer_index,
95 buffer_type: BufferType::Page as i32,
96 }),
97 compression: compression.map(|compression_config| pb::Compression {
98 scheme: compression_config.scheme.to_string(),
99 level: compression_config.level,
100 }),
101 })),
102 }
103 }
104
105 pub fn fsl_encoding(dimension: u64, items: ArrayEncoding, has_validity: bool) -> ArrayEncoding {
106 ArrayEncoding {
107 array_encoding: Some(ArrayEncodingEnum::FixedSizeList(Box::new(FixedSizeList {
108 dimension: dimension.try_into().unwrap(),
109 items: Some(Box::new(items)),
110 has_validity,
111 }))),
112 }
113 }
114
115 pub fn bitpacked_encoding(
116 compressed_bits_per_value: u64,
117 uncompressed_bits_per_value: u64,
118 buffer_index: u32,
119 signed: bool,
120 ) -> ArrayEncoding {
121 ArrayEncoding {
122 array_encoding: Some(ArrayEncodingEnum::Bitpacked(Bitpacked {
123 compressed_bits_per_value,
124 buffer: Some(pb::Buffer {
125 buffer_index,
126 buffer_type: BufferType::Page as i32,
127 }),
128 uncompressed_bits_per_value,
129 signed,
130 })),
131 }
132 }
133
134 pub fn bitpacked_for_non_neg_encoding(
135 compressed_bits_per_value: u64,
136 uncompressed_bits_per_value: u64,
137 buffer_index: u32,
138 ) -> ArrayEncoding {
139 ArrayEncoding {
140 array_encoding: Some(ArrayEncodingEnum::BitpackedForNonNeg(BitpackedForNonNeg {
141 compressed_bits_per_value,
142 buffer: Some(pb::Buffer {
143 buffer_index,
144 buffer_type: BufferType::Page as i32,
145 }),
146 uncompressed_bits_per_value,
147 })),
148 }
149 }
150 pub fn inline_bitpacking(uncompressed_bits_per_value: u64) -> ArrayEncoding {
151 ArrayEncoding {
152 array_encoding: Some(ArrayEncodingEnum::InlineBitpacking(InlineBitpacking {
153 uncompressed_bits_per_value,
154 })),
155 }
156 }
157 pub fn out_of_line_bitpacking(
158 uncompressed_bits_per_value: u64,
159 compressed_bits_per_value: u64,
160 ) -> ArrayEncoding {
161 ArrayEncoding {
162 array_encoding: Some(ArrayEncodingEnum::OutOfLineBitpacking(
163 OutOfLineBitpacking {
164 uncompressed_bits_per_value,
165 compressed_bits_per_value,
166 },
167 )),
168 }
169 }
170
171 pub fn variable(bits_per_offset: u8) -> ArrayEncoding {
172 ArrayEncoding {
173 array_encoding: Some(ArrayEncodingEnum::Variable(Variable {
174 bits_per_offset: bits_per_offset as u32,
175 })),
176 }
177 }
178
179 pub fn fsst(data: ArrayEncoding, symbol_table: Vec<u8>) -> ArrayEncoding {
183 ArrayEncoding {
184 array_encoding: Some(ArrayEncodingEnum::Fsst(Box::new(Fsst {
185 binary: Some(Box::new(data)),
186 symbol_table: symbol_table.into(),
187 }))),
188 }
189 }
190
191 pub fn packed_struct(
192 child_encodings: Vec<ArrayEncoding>,
193 packed_buffer_index: u32,
194 ) -> ArrayEncoding {
195 ArrayEncoding {
196 array_encoding: Some(ArrayEncodingEnum::PackedStruct(PackedStruct {
197 inner: child_encodings,
198 buffer: Some(pb::Buffer {
199 buffer_index: packed_buffer_index,
200 buffer_type: BufferType::Page as i32,
201 }),
202 })),
203 }
204 }
205
206 pub fn packed_struct_fixed_width_mini_block(
207 data: ArrayEncoding,
208 bits_per_values: Vec<u32>,
209 ) -> ArrayEncoding {
210 ArrayEncoding {
211 array_encoding: Some(ArrayEncodingEnum::PackedStructFixedWidthMiniBlock(
212 Box::new(PackedStructFixedWidthMiniBlock {
213 flat: Some(Box::new(data)),
214 bits_per_values,
215 }),
216 )),
217 }
218 }
219
220 pub fn binary(
221 indices_encoding: ArrayEncoding,
222 bytes_encoding: ArrayEncoding,
223 null_adjustment: u64,
224 ) -> ArrayEncoding {
225 ArrayEncoding {
226 array_encoding: Some(ArrayEncodingEnum::Binary(Box::new(Binary {
227 bytes: Some(Box::new(bytes_encoding)),
228 indices: Some(Box::new(indices_encoding)),
229 null_adjustment,
230 }))),
231 }
232 }
233
234 pub fn dict_encoding(
235 indices: ArrayEncoding,
236 items: ArrayEncoding,
237 num_items: u32,
238 ) -> ArrayEncoding {
239 ArrayEncoding {
240 array_encoding: Some(ArrayEncodingEnum::Dictionary(Box::new(Dictionary {
241 indices: Some(Box::new(indices)),
242 items: Some(Box::new(items)),
243 num_dictionary_items: num_items,
244 }))),
245 }
246 }
247
248 pub fn fixed_size_binary(data: ArrayEncoding, byte_width: u32) -> ArrayEncoding {
249 ArrayEncoding {
250 array_encoding: Some(ArrayEncodingEnum::FixedSizeBinary(Box::new(
251 FixedSizeBinary {
252 bytes: Some(Box::new(data)),
253 byte_width,
254 },
255 ))),
256 }
257 }
258
259 fn def_inter_to_repdef_layer(def: DefinitionInterpretation) -> i32 {
260 match def {
261 DefinitionInterpretation::AllValidItem => RepDefLayer::RepdefAllValidItem as i32,
262 DefinitionInterpretation::AllValidList => RepDefLayer::RepdefAllValidList as i32,
263 DefinitionInterpretation::NullableItem => RepDefLayer::RepdefNullableItem as i32,
264 DefinitionInterpretation::NullableList => RepDefLayer::RepdefNullableList as i32,
265 DefinitionInterpretation::EmptyableList => RepDefLayer::RepdefEmptyableList as i32,
266 DefinitionInterpretation::NullableAndEmptyableList => {
267 RepDefLayer::RepdefNullAndEmptyList as i32
268 }
269 }
270 }
271
272 pub fn repdef_layer_to_def_interp(layer: i32) -> DefinitionInterpretation {
273 let layer = RepDefLayer::try_from(layer).unwrap();
274 match layer {
275 RepDefLayer::RepdefAllValidItem => DefinitionInterpretation::AllValidItem,
276 RepDefLayer::RepdefAllValidList => DefinitionInterpretation::AllValidList,
277 RepDefLayer::RepdefNullableItem => DefinitionInterpretation::NullableItem,
278 RepDefLayer::RepdefNullableList => DefinitionInterpretation::NullableList,
279 RepDefLayer::RepdefEmptyableList => DefinitionInterpretation::EmptyableList,
280 RepDefLayer::RepdefNullAndEmptyList => {
281 DefinitionInterpretation::NullableAndEmptyableList
282 }
283 RepDefLayer::RepdefUnspecified => panic!("Unspecified repdef layer"),
284 }
285 }
286
287 #[allow(clippy::too_many_arguments)]
288 pub fn miniblock_layout(
289 rep_encoding: Option<ArrayEncoding>,
290 def_encoding: Option<ArrayEncoding>,
291 value_encoding: ArrayEncoding,
292 repetition_index_depth: u32,
293 num_buffers: u64,
294 dictionary_encoding: Option<(ArrayEncoding, u64)>,
295 def_meaning: &[DefinitionInterpretation],
296 num_items: u64,
297 ) -> PageLayout {
298 assert!(!def_meaning.is_empty());
299 let (dictionary, num_dictionary_items) = dictionary_encoding
300 .map(|(d, i)| (Some(d), i))
301 .unwrap_or((None, 0));
302 PageLayout {
303 layout: Some(Layout::MiniBlockLayout(MiniBlockLayout {
304 def_compression: def_encoding,
305 rep_compression: rep_encoding,
306 value_compression: Some(value_encoding),
307 repetition_index_depth,
308 num_buffers,
309 dictionary,
310 num_dictionary_items,
311 layers: def_meaning
312 .iter()
313 .map(|&def| Self::def_inter_to_repdef_layer(def))
314 .collect(),
315 num_items,
316 })),
317 }
318 }
319
320 fn full_zip_layout(
321 bits_rep: u8,
322 bits_def: u8,
323 details: full_zip_layout::Details,
324 value_encoding: ArrayEncoding,
325 def_meaning: &[DefinitionInterpretation],
326 num_items: u32,
327 num_visible_items: u32,
328 ) -> PageLayout {
329 PageLayout {
330 layout: Some(Layout::FullZipLayout(pb::FullZipLayout {
331 bits_rep: bits_rep as u32,
332 bits_def: bits_def as u32,
333 details: Some(details),
334 value_compression: Some(value_encoding),
335 num_items,
336 num_visible_items,
337 layers: def_meaning
338 .iter()
339 .map(|&def| Self::def_inter_to_repdef_layer(def))
340 .collect(),
341 })),
342 }
343 }
344
345 pub fn fixed_full_zip_layout(
346 bits_rep: u8,
347 bits_def: u8,
348 bits_per_value: u32,
349 value_encoding: ArrayEncoding,
350 def_meaning: &[DefinitionInterpretation],
351 num_items: u32,
352 num_visible_items: u32,
353 ) -> PageLayout {
354 Self::full_zip_layout(
355 bits_rep,
356 bits_def,
357 full_zip_layout::Details::BitsPerValue(bits_per_value),
358 value_encoding,
359 def_meaning,
360 num_items,
361 num_visible_items,
362 )
363 }
364
365 pub fn variable_full_zip_layout(
366 bits_rep: u8,
367 bits_def: u8,
368 bits_per_offset: u32,
369 value_encoding: ArrayEncoding,
370 def_meaning: &[DefinitionInterpretation],
371 num_items: u32,
372 num_visible_items: u32,
373 ) -> PageLayout {
374 Self::full_zip_layout(
375 bits_rep,
376 bits_def,
377 full_zip_layout::Details::BitsPerOffset(bits_per_offset),
378 value_encoding,
379 def_meaning,
380 num_items,
381 num_visible_items,
382 )
383 }
384
385 pub fn all_null_layout(def_meaning: &[DefinitionInterpretation]) -> PageLayout {
386 PageLayout {
387 layout: Some(Layout::AllNullLayout(AllNullLayout {
388 layers: def_meaning
389 .iter()
390 .map(|&def| Self::def_inter_to_repdef_layer(def))
391 .collect(),
392 })),
393 }
394 }
395
396 pub fn simple_all_null_layout() -> PageLayout {
397 Self::all_null_layout(&[DefinitionInterpretation::NullableItem])
398 }
399}