1use std::{
110 iter::{Copied, Zip},
111 sync::Arc,
112};
113
114use arrow_array::OffsetSizeTrait;
115use arrow_buffer::{
116 ArrowNativeType, BooleanBuffer, BooleanBufferBuilder, NullBuffer, OffsetBuffer, ScalarBuffer,
117};
118use lance_core::{utils::bit::log_2_ceil, Error, Result};
119use snafu::location;
120
121use crate::buffer::LanceBuffer;
122
123pub type LevelBuffer = Vec<u16>;
124
125const SPECIAL_THRESHOLD: u16 = u16::MAX / 2;
136
137#[derive(Clone, Debug)]
140struct OffsetDesc {
141 offsets: Arc<[i64]>,
142 validity: Option<BooleanBuffer>,
143 has_empty_lists: bool,
144 num_values: usize,
145 num_specials: usize,
146}
147
148#[derive(Clone, Debug)]
151struct ValidityDesc {
152 validity: Option<BooleanBuffer>,
153 num_values: usize,
154}
155
156#[derive(Clone, Debug)]
160struct FslDesc {
161 validity: Option<BooleanBuffer>,
162 dimension: usize,
163 num_values: usize,
164}
165
166#[derive(Clone, Debug)]
170enum RawRepDef {
171 Offsets(OffsetDesc),
172 Validity(ValidityDesc),
173 Fsl(FslDesc),
174}
175
176impl RawRepDef {
177 fn has_nulls(&self) -> bool {
179 match self {
180 Self::Offsets(OffsetDesc { validity, .. }) => validity.is_some(),
181 Self::Validity(ValidityDesc { validity, .. }) => validity.is_some(),
182 Self::Fsl(FslDesc { validity, .. }) => validity.is_some(),
183 }
184 }
185
186 fn num_values(&self) -> usize {
188 match self {
189 Self::Offsets(OffsetDesc { num_values, .. }) => *num_values,
190 Self::Validity(ValidityDesc { num_values, .. }) => *num_values,
191 Self::Fsl(FslDesc { num_values, .. }) => *num_values,
192 }
193 }
194
195 fn num_specials(&self) -> usize {
197 match self {
198 Self::Offsets(OffsetDesc { num_specials, .. }) => *num_specials,
199 _ => 0,
200 }
201 }
202
203 fn max_def(&self) -> u16 {
205 match self {
206 Self::Offsets(OffsetDesc {
207 has_empty_lists,
208 validity,
209 ..
210 }) => {
211 let mut max_def = 0;
212 if *has_empty_lists {
213 max_def += 1;
214 }
215 if validity.is_some() {
216 max_def += 1;
217 }
218 max_def
219 }
220 Self::Validity(ValidityDesc { validity: None, .. }) => 0,
221 Self::Validity(ValidityDesc { .. }) => 1,
222 Self::Fsl(FslDesc { validity: None, .. }) => 0,
223 Self::Fsl(FslDesc { .. }) => 1,
224 }
225 }
226
227 fn max_rep(&self) -> u16 {
229 match self {
230 Self::Offsets(_) => 1,
231 _ => 0,
232 }
233 }
234}
235
236#[derive(Debug)]
239pub struct SerializedRepDefs {
240 pub repetition_levels: Option<Arc<[u16]>>,
244 pub definition_levels: Option<Arc<[u16]>>,
248 pub def_meaning: Vec<DefinitionInterpretation>,
250 pub max_visible_level: Option<u16>,
257}
258
259impl SerializedRepDefs {
260 pub fn new(
261 repetition_levels: Option<LevelBuffer>,
262 definition_levels: Option<LevelBuffer>,
263 def_meaning: Vec<DefinitionInterpretation>,
264 ) -> Self {
265 let first_list = def_meaning.iter().position(|level| level.is_list());
266 let max_visible_level = first_list.map(|first_list| {
267 def_meaning
268 .iter()
269 .map(|level| level.num_def_levels())
270 .take(first_list)
271 .sum::<u16>()
272 });
273 Self {
274 repetition_levels: repetition_levels.map(Arc::from),
275 definition_levels: definition_levels.map(Arc::from),
276 def_meaning,
277 max_visible_level,
278 }
279 }
280
281 pub fn empty(def_meaning: Vec<DefinitionInterpretation>) -> Self {
283 Self {
284 repetition_levels: None,
285 definition_levels: None,
286 def_meaning,
287 max_visible_level: None,
288 }
289 }
290
291 pub fn rep_slicer(&self) -> Option<RepDefSlicer<'_>> {
292 self.repetition_levels
293 .as_ref()
294 .map(|rep| RepDefSlicer::new(self, rep.clone()))
295 }
296
297 pub fn def_slicer(&self) -> Option<RepDefSlicer<'_>> {
298 self.definition_levels
299 .as_ref()
300 .map(|def| RepDefSlicer::new(self, def.clone()))
301 }
302}
303
304#[derive(Debug)]
312pub struct RepDefSlicer<'a> {
313 repdef: &'a SerializedRepDefs,
314 to_slice: LanceBuffer,
315 current: usize,
316}
317
318impl<'a> RepDefSlicer<'a> {
320 fn new(repdef: &'a SerializedRepDefs, levels: Arc<[u16]>) -> Self {
321 Self {
322 repdef,
323 to_slice: LanceBuffer::reinterpret_slice(levels),
324 current: 0,
325 }
326 }
327
328 pub fn num_levels(&self) -> usize {
329 self.to_slice.len() / 2
330 }
331
332 pub fn num_levels_remaining(&self) -> usize {
333 self.num_levels() - self.current
334 }
335
336 pub fn all_levels(&self) -> &LanceBuffer {
337 &self.to_slice
338 }
339
340 pub fn slice_rest(&mut self) -> LanceBuffer {
349 let start = self.current;
350 let remaining = self.num_levels_remaining();
351 self.current = self.num_levels();
352 self.to_slice.slice_with_length(start * 2, remaining * 2)
353 }
354
355 pub fn slice_next(&mut self, num_values: usize) -> LanceBuffer {
357 let start = self.current;
358 let Some(max_visible_level) = self.repdef.max_visible_level else {
359 self.current = start + num_values;
361 return self.to_slice.slice_with_length(start * 2, num_values * 2);
362 };
363 if let Some(def) = self.repdef.definition_levels.as_ref() {
364 let mut def_itr = def[start..].iter();
368 let mut num_taken = 0;
369 let mut num_passed = 0;
370 while num_taken < num_values {
371 let def_level = *def_itr.next().unwrap();
372 if def_level <= max_visible_level {
373 num_taken += 1;
374 }
375 num_passed += 1;
376 }
377 self.current = start + num_passed;
378 self.to_slice.slice_with_length(start * 2, num_passed * 2)
379 } else {
380 self.current = start + num_values;
382 self.to_slice.slice_with_length(start * 2, num_values * 2)
383 }
384 }
385}
386
387#[derive(Debug, Copy, Clone, PartialEq, Eq)]
400pub enum DefinitionInterpretation {
401 AllValidItem,
402 AllValidList,
403 NullableItem,
404 NullableList,
405 EmptyableList,
406 NullableAndEmptyableList,
407}
408
409impl DefinitionInterpretation {
410 pub fn num_def_levels(&self) -> u16 {
412 match self {
413 Self::AllValidItem => 0,
414 Self::AllValidList => 0,
415 Self::NullableItem => 1,
416 Self::NullableList => 1,
417 Self::EmptyableList => 1,
418 Self::NullableAndEmptyableList => 2,
419 }
420 }
421
422 pub fn is_all_valid(&self) -> bool {
424 matches!(
425 self,
426 Self::AllValidItem | Self::AllValidList | Self::EmptyableList
427 )
428 }
429
430 pub fn is_list(&self) -> bool {
432 matches!(
433 self,
434 Self::AllValidList
435 | Self::NullableList
436 | Self::EmptyableList
437 | Self::NullableAndEmptyableList
438 )
439 }
440}
441
442#[derive(Debug)]
454struct SerializerContext {
455 def_meaning: Vec<DefinitionInterpretation>,
457 rep_levels: LevelBuffer,
458 spare_rep: LevelBuffer,
459 def_levels: LevelBuffer,
460 spare_def: LevelBuffer,
461 current_rep: u16,
462 current_def: u16,
463 current_len: usize,
464 current_num_specials: usize,
465}
466
467impl SerializerContext {
468 fn new(len: usize, num_layers: usize, max_rep: u16, max_def: u16) -> Self {
469 let def_meaning = Vec::with_capacity(num_layers);
470 Self {
471 rep_levels: if max_rep > 0 {
472 vec![0; len]
473 } else {
474 LevelBuffer::default()
475 },
476 spare_rep: if max_rep > 0 {
477 vec![0; len]
478 } else {
479 LevelBuffer::default()
480 },
481 def_levels: if max_def > 0 {
482 vec![0; len]
483 } else {
484 LevelBuffer::default()
485 },
486 spare_def: if max_def > 0 {
487 vec![0; len]
488 } else {
489 LevelBuffer::default()
490 },
491 def_meaning,
492 current_rep: max_rep,
493 current_def: max_def,
494 current_len: 0,
495 current_num_specials: 0,
496 }
497 }
498
499 fn checkout_def(&mut self, meaning: DefinitionInterpretation) -> u16 {
500 let def = self.current_def;
501 self.current_def -= meaning.num_def_levels();
502 self.def_meaning.push(meaning);
503 def
504 }
505
506 fn record_offsets(&mut self, offset_desc: &OffsetDesc) {
507 let rep_level = self.current_rep;
508 let (null_list_level, empty_list_level) =
509 match (offset_desc.validity.is_some(), offset_desc.has_empty_lists) {
510 (true, true) => {
511 let level =
512 self.checkout_def(DefinitionInterpretation::NullableAndEmptyableList);
513 (level - 1, level)
514 }
515 (true, false) => (self.checkout_def(DefinitionInterpretation::NullableList), 0),
516 (false, true) => (
517 0,
518 self.checkout_def(DefinitionInterpretation::EmptyableList),
519 ),
520 (false, false) => {
521 self.checkout_def(DefinitionInterpretation::AllValidList);
522 (0, 0)
523 }
524 };
525 self.current_rep -= 1;
526
527 if let Some(validity) = &offset_desc.validity {
528 self.do_record_validity(validity, null_list_level);
529 }
530
531 let mut new_len = 0;
536 assert!(self.rep_levels.len() >= (offset_desc.num_values + self.current_num_specials) - 1);
537 if self.def_levels.is_empty() {
538 let mut write_itr = self.spare_rep.iter_mut();
539 let mut read_iter = self.rep_levels.iter().copied();
540 for w in offset_desc.offsets.windows(2) {
541 let len = w[1] - w[0];
542 assert!(len > 0);
544 let rep = read_iter.next().unwrap();
545 let list_level = if rep == 0 { rep_level } else { rep };
546 *write_itr.next().unwrap() = list_level;
547
548 for _ in 1..len {
549 *write_itr.next().unwrap() = 0;
550 }
551 new_len += len as usize;
552 }
553 std::mem::swap(&mut self.rep_levels, &mut self.spare_rep);
554 } else {
555 assert!(
556 self.def_levels.len() >= (offset_desc.num_values + self.current_num_specials) - 1
557 );
558 let mut def_write_itr = self.spare_def.iter_mut();
559 let mut rep_write_itr = self.spare_rep.iter_mut();
560 let mut rep_read_itr = self.rep_levels.iter().copied();
561 let mut def_read_itr = self.def_levels.iter().copied();
562 let specials_to_pass = self.current_num_specials;
563 let mut specials_passed = 0;
564
565 for w in offset_desc.offsets.windows(2) {
566 let mut def = def_read_itr.next().unwrap();
567 while def > SPECIAL_THRESHOLD {
569 *def_write_itr.next().unwrap() = def;
570 *rep_write_itr.next().unwrap() = rep_read_itr.next().unwrap();
571 def = def_read_itr.next().unwrap();
572 new_len += 1;
573 specials_passed += 1;
574 }
575
576 let len = w[1] - w[0];
577 let rep = rep_read_itr.next().unwrap();
578
579 let list_level = if rep == 0 { rep_level } else { rep };
583
584 if def == 0 && len > 0 {
585 *def_write_itr.next().unwrap() = 0;
587 *rep_write_itr.next().unwrap() = list_level;
588
589 for _ in 1..len {
590 *def_write_itr.next().unwrap() = 0;
591 *rep_write_itr.next().unwrap() = 0;
592 }
593
594 new_len += len as usize;
595 } else if def == 0 {
596 *def_write_itr.next().unwrap() = empty_list_level + SPECIAL_THRESHOLD;
598 *rep_write_itr.next().unwrap() = list_level;
599 new_len += 1;
600 } else {
601 *def_write_itr.next().unwrap() = def + SPECIAL_THRESHOLD;
604 *rep_write_itr.next().unwrap() = list_level;
605 new_len += 1;
606 }
607 }
608
609 while specials_passed < specials_to_pass {
611 *def_write_itr.next().unwrap() = def_read_itr.next().unwrap();
612 *rep_write_itr.next().unwrap() = rep_read_itr.next().unwrap();
613 new_len += 1;
614 specials_passed += 1;
615 }
616 std::mem::swap(&mut self.def_levels, &mut self.spare_def);
617 std::mem::swap(&mut self.rep_levels, &mut self.spare_rep);
618 }
619
620 self.current_len = new_len;
621 self.current_num_specials += offset_desc.num_specials;
622 }
623
624 fn do_record_validity(&mut self, validity: &BooleanBuffer, null_level: u16) {
625 assert!(self.def_levels.len() >= validity.len() + self.current_num_specials);
626 debug_assert!(
627 self.current_len == 0 || self.current_len == validity.len() + self.current_num_specials
628 );
629 self.current_len = validity.len();
630
631 let mut def_read_itr = self.def_levels.iter().copied();
632 let mut def_write_itr = self.spare_def.iter_mut();
633
634 let specials_to_pass = self.current_num_specials;
635 let mut specials_passed = 0;
636
637 for incoming_validity in validity.iter() {
638 let mut def = def_read_itr.next().unwrap();
639 while def > SPECIAL_THRESHOLD {
640 *def_write_itr.next().unwrap() = def;
641 def = def_read_itr.next().unwrap();
642 specials_passed += 1;
643 }
644 if def == 0 && !incoming_validity {
645 *def_write_itr.next().unwrap() = null_level;
646 } else {
647 *def_write_itr.next().unwrap() = def;
648 }
649 }
650
651 while specials_passed < specials_to_pass {
652 *def_write_itr.next().unwrap() = def_read_itr.next().unwrap();
653 specials_passed += 1;
654 }
655
656 std::mem::swap(&mut self.def_levels, &mut self.spare_def);
657 }
658
659 fn multiply_levels(&mut self, multiplier: usize) {
660 let old_len = self.current_len;
661 self.current_len =
663 (self.current_len - self.current_num_specials) * multiplier + self.current_num_specials;
664
665 if self.rep_levels.is_empty() && self.def_levels.is_empty() {
666 return;
668 } else if self.rep_levels.is_empty() {
669 assert!(self.def_levels.len() >= self.current_len);
670 let mut def_read_itr = self.def_levels.iter().copied();
672 let mut def_write_itr = self.spare_def.iter_mut();
673 for _ in 0..old_len {
674 let mut def = def_read_itr.next().unwrap();
675 while def > SPECIAL_THRESHOLD {
676 *def_write_itr.next().unwrap() = def;
677 def = def_read_itr.next().unwrap();
678 }
679 for _ in 0..multiplier {
680 *def_write_itr.next().unwrap() = def;
681 }
682 }
683 } else if self.def_levels.is_empty() {
684 assert!(self.rep_levels.len() >= self.current_len);
685 let mut rep_read_itr = self.rep_levels.iter().copied();
687 let mut rep_write_itr = self.spare_rep.iter_mut();
688 for _ in 0..old_len {
689 let rep = rep_read_itr.next().unwrap();
690 for _ in 0..multiplier {
691 *rep_write_itr.next().unwrap() = rep;
692 }
693 }
694 } else {
695 assert!(self.rep_levels.len() >= self.current_len);
696 assert!(self.def_levels.len() >= self.current_len);
697 let mut rep_read_itr = self.rep_levels.iter().copied();
698 let mut def_read_itr = self.def_levels.iter().copied();
699 let mut rep_write_itr = self.spare_rep.iter_mut();
700 let mut def_write_itr = self.spare_def.iter_mut();
701 for _ in 0..old_len {
702 let mut def = def_read_itr.next().unwrap();
703 while def > SPECIAL_THRESHOLD {
704 *def_write_itr.next().unwrap() = def;
705 *rep_write_itr.next().unwrap() = rep_read_itr.next().unwrap();
706 def = def_read_itr.next().unwrap();
707 }
708 let rep = rep_read_itr.next().unwrap();
709 for _ in 0..multiplier {
710 *def_write_itr.next().unwrap() = def;
711 *rep_write_itr.next().unwrap() = rep;
712 }
713 }
714 }
715 std::mem::swap(&mut self.def_levels, &mut self.spare_def);
716 std::mem::swap(&mut self.rep_levels, &mut self.spare_rep);
717 }
718
719 fn record_validity_buf(&mut self, validity: &Option<BooleanBuffer>) {
720 if let Some(validity) = validity {
721 let def_level = self.checkout_def(DefinitionInterpretation::NullableItem);
722 self.do_record_validity(validity, def_level);
723 } else {
724 self.checkout_def(DefinitionInterpretation::AllValidItem);
725 }
726 }
727
728 fn record_validity(&mut self, validity_desc: &ValidityDesc) {
729 self.record_validity_buf(&validity_desc.validity)
730 }
731
732 fn record_fsl(&mut self, fsl_desc: &FslDesc) {
733 self.record_validity_buf(&fsl_desc.validity);
734 self.multiply_levels(fsl_desc.dimension);
735 }
736
737 fn normalize_specials(&mut self) {
738 for def in self.def_levels.iter_mut() {
739 if *def > SPECIAL_THRESHOLD {
740 *def -= SPECIAL_THRESHOLD;
741 }
742 }
743 }
744
745 fn build(mut self) -> SerializedRepDefs {
746 if self.current_len == 0 {
747 return SerializedRepDefs::new(None, None, self.def_meaning);
748 }
749
750 self.normalize_specials();
751
752 let definition_levels = if self.def_levels.is_empty() {
753 None
754 } else {
755 Some(self.def_levels)
756 };
757 let repetition_levels = if self.rep_levels.is_empty() {
758 None
759 } else {
760 Some(self.rep_levels)
761 };
762
763 let def_meaning = self.def_meaning.into_iter().rev().collect::<Vec<_>>();
765
766 SerializedRepDefs::new(repetition_levels, definition_levels, def_meaning)
767 }
768}
769
770#[derive(Clone, Default, Debug)]
777pub struct RepDefBuilder {
778 repdefs: Vec<RawRepDef>,
780 len: Option<usize>,
785}
786
787impl RepDefBuilder {
788 fn check_validity_len(&mut self, incoming_len: usize) {
789 if let Some(len) = self.len {
790 assert_eq!(incoming_len, len);
791 } else {
792 self.len = Some(incoming_len);
794 }
795 }
796
797 fn num_layers(&self) -> usize {
798 self.repdefs.len()
799 }
800
801 pub fn is_empty(&self) -> bool {
804 self.repdefs
805 .iter()
806 .all(|r| matches!(r, RawRepDef::Validity(ValidityDesc { validity: None, .. })))
807 }
808
809 pub fn is_simple_validity(&self) -> bool {
811 self.repdefs.len() == 1 && matches!(self.repdefs[0], RawRepDef::Validity(_))
812 }
813
814 pub fn add_validity_bitmap(&mut self, validity: NullBuffer) {
816 self.check_validity_len(validity.len());
817 self.repdefs.push(RawRepDef::Validity(ValidityDesc {
818 num_values: validity.len(),
819 validity: Some(validity.into_inner()),
820 }));
821 }
822
823 pub fn add_no_null(&mut self, len: usize) {
825 self.check_validity_len(len);
826 self.repdefs.push(RawRepDef::Validity(ValidityDesc {
827 validity: None,
828 num_values: len,
829 }));
830 }
831
832 pub fn add_fsl(&mut self, validity: Option<NullBuffer>, dimension: usize, num_values: usize) {
833 if let Some(len) = self.len {
834 assert_eq!(num_values, len);
835 }
836 self.len = Some(num_values * dimension);
837 debug_assert!(validity.is_none() || validity.as_ref().unwrap().len() == num_values);
838 self.repdefs.push(RawRepDef::Fsl(FslDesc {
839 num_values,
840 validity: validity.map(|v| v.into_inner()),
841 dimension,
842 }))
843 }
844
845 fn check_offset_len(&mut self, offsets: &[i64]) {
846 if let Some(len) = self.len {
847 assert!(offsets.len() == len + 1);
848 }
849 self.len = Some(offsets[offsets.len() - 1] as usize);
850 }
851
852 fn do_add_offsets(
853 &mut self,
854 lengths: impl Iterator<Item = i64>,
855 validity: Option<NullBuffer>,
856 capacity: usize,
857 ) -> bool {
858 let mut num_specials = 0;
859 let mut has_empty_lists = false;
860 let mut has_garbage_values = false;
861 let mut last_off: i64 = 0;
862
863 let mut normalized_offsets = Vec::with_capacity(capacity);
864 normalized_offsets.push(0);
865
866 if let Some(ref validity) = validity {
867 for (len, is_valid) in lengths.zip(validity.iter()) {
868 match (is_valid, len == 0) {
869 (false, is_empty) => {
870 num_specials += 1;
871 has_garbage_values |= !is_empty;
872 }
873 (true, true) => {
874 num_specials += 1;
875 has_empty_lists = true;
876 }
877 _ => {
878 last_off += len;
879 }
880 }
881 normalized_offsets.push(last_off);
882 }
883 } else {
884 for len in lengths {
885 if len == 0 {
886 num_specials += 1;
887 has_empty_lists = true;
888 }
889 last_off += len;
890 normalized_offsets.push(last_off);
891 }
892 }
893
894 self.check_offset_len(&normalized_offsets);
895 self.repdefs.push(RawRepDef::Offsets(OffsetDesc {
896 num_values: normalized_offsets.len() - 1,
897 offsets: normalized_offsets.into(),
898 validity: validity.map(|v| v.into_inner()),
899 has_empty_lists,
900 num_specials: num_specials as usize,
901 }));
902
903 has_garbage_values
904 }
905
906 pub fn add_offsets<O: OffsetSizeTrait>(
913 &mut self,
914 offsets: OffsetBuffer<O>,
915 validity: Option<NullBuffer>,
916 ) -> bool {
917 let inner = offsets.into_inner();
918 let buffer_len = inner.len();
919
920 if O::IS_LARGE {
921 let i64_buff = ScalarBuffer::<i64>::new(inner.into_inner(), 0, buffer_len);
922 let lengths = i64_buff.windows(2).map(|off| off[1] - off[0]);
923 self.do_add_offsets(lengths, validity, buffer_len)
924 } else {
925 let i32_buff = ScalarBuffer::<i32>::new(inner.into_inner(), 0, buffer_len);
926 let lengths = i32_buff.windows(2).map(|off| (off[1] - off[0]) as i64);
927 self.do_add_offsets(lengths, validity, buffer_len)
928 }
929 }
930
931 fn concat_layers<'a>(
943 layers: impl Iterator<Item = &'a RawRepDef>,
944 num_layers: usize,
945 ) -> RawRepDef {
946 enum LayerKind {
947 Validity,
948 Fsl,
949 Offsets,
950 }
951
952 let mut collected = Vec::with_capacity(num_layers);
955 let mut has_nulls = false;
956 let mut layer_kind = LayerKind::Validity;
957 let mut total_num_specials = 0;
958 let mut all_dimension = 0;
959 let mut all_has_empty_lists = false;
960 let mut all_num_values = 0;
961 for layer in layers {
962 has_nulls |= layer.has_nulls();
963 match layer {
964 RawRepDef::Validity(_) => {
965 layer_kind = LayerKind::Validity;
966 }
967 RawRepDef::Offsets(OffsetDesc {
968 num_specials,
969 has_empty_lists,
970 ..
971 }) => {
972 all_has_empty_lists |= *has_empty_lists;
973 layer_kind = LayerKind::Offsets;
974 total_num_specials += num_specials;
975 }
976 RawRepDef::Fsl(FslDesc { dimension, .. }) => {
977 layer_kind = LayerKind::Fsl;
978 all_dimension = *dimension;
979 }
980 }
981 collected.push(layer);
982 all_num_values += layer.num_values();
983 }
984
985 if !has_nulls {
987 match layer_kind {
988 LayerKind::Validity => {
989 return RawRepDef::Validity(ValidityDesc {
990 validity: None,
991 num_values: all_num_values,
992 });
993 }
994 LayerKind::Fsl => {
995 return RawRepDef::Fsl(FslDesc {
996 validity: None,
997 num_values: all_num_values,
998 dimension: all_dimension,
999 })
1000 }
1001 LayerKind::Offsets => {}
1002 }
1003 }
1004
1005 let mut validity_builder = if has_nulls {
1007 BooleanBufferBuilder::new(all_num_values)
1008 } else {
1009 BooleanBufferBuilder::new(0)
1010 };
1011 let mut all_offsets = if matches!(layer_kind, LayerKind::Offsets) {
1012 let mut all_offsets = Vec::with_capacity(all_num_values);
1013 all_offsets.push(0);
1014 all_offsets
1015 } else {
1016 Vec::new()
1017 };
1018
1019 for layer in collected {
1020 match layer {
1021 RawRepDef::Validity(ValidityDesc {
1022 validity: Some(validity),
1023 ..
1024 }) => {
1025 validity_builder.append_buffer(validity);
1026 }
1027 RawRepDef::Validity(ValidityDesc {
1028 validity: None,
1029 num_values,
1030 }) => {
1031 validity_builder.append_n(*num_values, true);
1032 }
1033 RawRepDef::Fsl(FslDesc {
1034 validity,
1035 num_values,
1036 ..
1037 }) => {
1038 if let Some(validity) = validity {
1039 validity_builder.append_buffer(validity);
1040 } else {
1041 validity_builder.append_n(*num_values, true);
1042 }
1043 }
1044 RawRepDef::Offsets(OffsetDesc {
1045 offsets,
1046 validity: Some(validity),
1047 has_empty_lists,
1048 ..
1049 }) => {
1050 all_has_empty_lists |= has_empty_lists;
1051 validity_builder.append_buffer(validity);
1052 let last = *all_offsets.last().unwrap();
1053 all_offsets.extend(offsets.iter().skip(1).map(|off| *off + last));
1054 }
1055 RawRepDef::Offsets(OffsetDesc {
1056 offsets,
1057 validity: None,
1058 has_empty_lists,
1059 num_values,
1060 ..
1061 }) => {
1062 all_has_empty_lists |= has_empty_lists;
1063 if has_nulls {
1064 validity_builder.append_n(*num_values, true);
1065 }
1066 let last = *all_offsets.last().unwrap();
1067 all_offsets.extend(offsets.iter().skip(1).map(|off| *off + last));
1068 }
1069 }
1070 }
1071 let validity = if has_nulls {
1072 Some(validity_builder.finish())
1073 } else {
1074 None
1075 };
1076 match layer_kind {
1077 LayerKind::Fsl => RawRepDef::Fsl(FslDesc {
1078 validity,
1079 num_values: all_num_values,
1080 dimension: all_dimension,
1081 }),
1082 LayerKind::Validity => RawRepDef::Validity(ValidityDesc {
1083 validity,
1084 num_values: all_num_values,
1085 }),
1086 LayerKind::Offsets => RawRepDef::Offsets(OffsetDesc {
1087 offsets: all_offsets.into(),
1088 validity,
1089 has_empty_lists: all_has_empty_lists,
1090 num_values: all_num_values,
1091 num_specials: total_num_specials,
1092 }),
1093 }
1094 }
1095
1096 pub fn serialize(builders: Vec<Self>) -> SerializedRepDefs {
1099 assert!(!builders.is_empty());
1100 if builders.iter().all(|b| b.is_empty()) {
1101 return SerializedRepDefs::empty(
1103 builders
1104 .first()
1105 .unwrap()
1106 .repdefs
1107 .iter()
1108 .map(|_| DefinitionInterpretation::AllValidItem)
1109 .collect::<Vec<_>>(),
1110 );
1111 }
1112
1113 let num_layers = builders[0].num_layers();
1114 let combined_layers = (0..num_layers)
1115 .map(|layer_index| {
1116 Self::concat_layers(
1117 builders.iter().map(|b| &b.repdefs[layer_index]),
1118 builders.len(),
1119 )
1120 })
1121 .collect::<Vec<_>>();
1122 debug_assert!(builders
1123 .iter()
1124 .all(|b| b.num_layers() == builders[0].num_layers()));
1125
1126 let total_len = combined_layers.last().unwrap().num_values()
1127 + combined_layers
1128 .iter()
1129 .map(|l| l.num_specials())
1130 .sum::<usize>();
1131 let max_rep = combined_layers.iter().map(|l| l.max_rep()).sum::<u16>();
1132 let max_def = combined_layers.iter().map(|l| l.max_def()).sum::<u16>();
1133
1134 let mut context = SerializerContext::new(total_len, num_layers, max_rep, max_def);
1135 for layer in combined_layers.into_iter() {
1136 match layer {
1137 RawRepDef::Validity(def) => {
1138 context.record_validity(&def);
1139 }
1140 RawRepDef::Offsets(rep) => {
1141 context.record_offsets(&rep);
1142 }
1143 RawRepDef::Fsl(fsl) => {
1144 context.record_fsl(&fsl);
1145 }
1146 }
1147 }
1148 context.build()
1149 }
1150}
1151
1152#[derive(Debug)]
1157pub struct RepDefUnraveler {
1158 rep_levels: Option<LevelBuffer>,
1159 def_levels: Option<LevelBuffer>,
1160 levels_to_rep: Vec<u16>,
1162 def_meaning: Arc<[DefinitionInterpretation]>,
1163 current_def_cmp: u16,
1165 current_rep_cmp: u16,
1167 current_layer: usize,
1170 num_items: u64,
1172}
1173
1174impl RepDefUnraveler {
1175 pub fn new(
1177 rep_levels: Option<LevelBuffer>,
1178 def_levels: Option<LevelBuffer>,
1179 def_meaning: Arc<[DefinitionInterpretation]>,
1180 num_items: u64,
1181 ) -> Self {
1182 let mut levels_to_rep = Vec::with_capacity(def_meaning.len());
1183 let mut rep_counter = 0;
1184 levels_to_rep.push(0);
1186 for meaning in def_meaning.as_ref() {
1187 match meaning {
1188 DefinitionInterpretation::AllValidItem | DefinitionInterpretation::AllValidList => {
1189 }
1191 DefinitionInterpretation::NullableItem => {
1192 levels_to_rep.push(rep_counter);
1194 }
1195 DefinitionInterpretation::NullableList => {
1196 rep_counter += 1;
1197 levels_to_rep.push(rep_counter);
1198 }
1199 DefinitionInterpretation::EmptyableList => {
1200 rep_counter += 1;
1201 levels_to_rep.push(rep_counter);
1202 }
1203 DefinitionInterpretation::NullableAndEmptyableList => {
1204 rep_counter += 1;
1205 levels_to_rep.push(rep_counter);
1206 levels_to_rep.push(rep_counter);
1207 }
1208 }
1209 }
1210 Self {
1211 rep_levels,
1212 def_levels,
1213 current_def_cmp: 0,
1214 current_rep_cmp: 0,
1215 levels_to_rep,
1216 current_layer: 0,
1217 def_meaning,
1218 num_items,
1219 }
1220 }
1221
1222 pub fn is_all_valid(&self) -> bool {
1223 self.def_meaning[self.current_layer].is_all_valid()
1224 }
1225
1226 pub fn max_lists(&self) -> usize {
1232 debug_assert!(
1233 self.def_meaning[self.current_layer] != DefinitionInterpretation::NullableItem
1234 );
1235 self.rep_levels
1236 .as_ref()
1237 .map(|levels| levels.len())
1239 .unwrap_or(0)
1240 }
1241
1242 pub fn unravel_offsets<T: ArrowNativeType>(
1247 &mut self,
1248 offsets: &mut Vec<T>,
1249 validity: Option<&mut BooleanBufferBuilder>,
1250 ) -> Result<()> {
1251 let rep_levels = self
1252 .rep_levels
1253 .as_mut()
1254 .expect("Expected repetition level but data didn't contain repetition");
1255 let valid_level = self.current_def_cmp;
1256 let (null_level, empty_level) = match self.def_meaning[self.current_layer] {
1257 DefinitionInterpretation::NullableList => {
1258 self.current_def_cmp += 1;
1259 (valid_level + 1, 0)
1260 }
1261 DefinitionInterpretation::EmptyableList => {
1262 self.current_def_cmp += 1;
1263 (0, valid_level + 1)
1264 }
1265 DefinitionInterpretation::NullableAndEmptyableList => {
1266 self.current_def_cmp += 2;
1267 (valid_level + 1, valid_level + 2)
1268 }
1269 DefinitionInterpretation::AllValidList => (0, 0),
1270 _ => unreachable!(),
1271 };
1272 self.current_layer += 1;
1273
1274 let mut max_level = null_level.max(empty_level);
1278 let upper_null = max_level;
1281 for level in self.def_meaning[self.current_layer..].iter() {
1282 match level {
1283 DefinitionInterpretation::NullableItem => {
1284 max_level += 1;
1285 }
1286 DefinitionInterpretation::AllValidItem => {}
1287 _ => {
1288 break;
1289 }
1290 }
1291 }
1292
1293 let mut curlen: usize = offsets.last().map(|o| o.as_usize()).unwrap_or(0);
1294
1295 offsets.pop();
1303
1304 let to_offset = |val: usize| {
1305 T::from_usize(val)
1306 .ok_or_else(|| Error::invalid_input("A single batch had more than i32::MAX values and so a large container type is required", location!()))
1307 };
1308 self.current_rep_cmp += 1;
1309 if let Some(def_levels) = &mut self.def_levels {
1310 assert!(rep_levels.len() == def_levels.len());
1311 let mut push_validity: Box<dyn FnMut(bool)> = if let Some(validity) = validity {
1314 Box::new(|is_valid| validity.append(is_valid))
1315 } else {
1316 Box::new(|_| {})
1317 };
1318 let mut read_idx = 0;
1322 let mut write_idx = 0;
1323 while read_idx < rep_levels.len() {
1324 unsafe {
1327 let rep_val = *rep_levels.get_unchecked(read_idx);
1328 if rep_val != 0 {
1329 let def_val = *def_levels.get_unchecked(read_idx);
1330 *rep_levels.get_unchecked_mut(write_idx) = rep_val - 1;
1332 *def_levels.get_unchecked_mut(write_idx) = def_val;
1333 write_idx += 1;
1334
1335 if def_val == 0 {
1336 offsets.push(to_offset(curlen)?);
1338 curlen += 1;
1339 push_validity(true);
1340 } else if def_val > max_level {
1341 } else if def_val == null_level || def_val > upper_null {
1343 offsets.push(to_offset(curlen)?);
1345 push_validity(false);
1346 } else if def_val == empty_level {
1347 offsets.push(to_offset(curlen)?);
1349 push_validity(true);
1350 } else {
1351 offsets.push(to_offset(curlen)?);
1353 curlen += 1;
1354 push_validity(true);
1355 }
1356 } else {
1357 curlen += 1;
1358 }
1359 read_idx += 1;
1360 }
1361 }
1362 offsets.push(to_offset(curlen)?);
1363 rep_levels.truncate(write_idx);
1364 def_levels.truncate(write_idx);
1365 Ok(())
1366 } else {
1367 let mut read_idx = 0;
1369 let mut write_idx = 0;
1370 let old_offsets_len = offsets.len();
1371 while read_idx < rep_levels.len() {
1372 unsafe {
1374 let rep_val = *rep_levels.get_unchecked(read_idx);
1375 if rep_val != 0 {
1376 offsets.push(to_offset(curlen)?);
1378 *rep_levels.get_unchecked_mut(write_idx) = rep_val - 1;
1379 write_idx += 1;
1380 }
1381 curlen += 1;
1382 read_idx += 1;
1383 }
1384 }
1385 let num_new_lists = offsets.len() - old_offsets_len;
1386 offsets.push(to_offset(curlen)?);
1387 rep_levels.truncate(offsets.len() - 1);
1388 if let Some(validity) = validity {
1389 validity.append_n(num_new_lists, true);
1392 }
1393 Ok(())
1394 }
1395 }
1396
1397 pub fn skip_validity(&mut self) {
1398 debug_assert!(
1399 self.def_meaning[self.current_layer] == DefinitionInterpretation::AllValidItem
1400 );
1401 self.current_layer += 1;
1402 }
1403
1404 pub fn unravel_validity(&mut self, validity: &mut BooleanBufferBuilder) {
1406 if self.def_meaning[self.current_layer] == DefinitionInterpretation::AllValidItem {
1407 self.current_layer += 1;
1408 validity.append_n(self.num_items as usize, true);
1409 return;
1410 }
1411
1412 self.current_layer += 1;
1413 let def_levels = &self.def_levels.as_ref().unwrap();
1414
1415 let current_def_cmp = self.current_def_cmp;
1416 self.current_def_cmp += 1;
1417
1418 for is_valid in def_levels.iter().filter_map(|&level| {
1419 if self.levels_to_rep[level as usize] <= self.current_rep_cmp {
1420 Some(level <= current_def_cmp)
1421 } else {
1422 None
1423 }
1424 }) {
1425 validity.append(is_valid);
1426 }
1427 }
1428
1429 pub fn decimate(&mut self, dimension: usize) {
1430 if self.rep_levels.is_some() {
1431 todo!("Not yet supported FSL<...List<...>>");
1443 }
1444 let Some(def_levels) = self.def_levels.as_mut() else {
1445 return;
1446 };
1447 let mut read_idx = 0;
1448 let mut write_idx = 0;
1449 while read_idx < def_levels.len() {
1450 unsafe {
1451 *def_levels.get_unchecked_mut(write_idx) = *def_levels.get_unchecked(read_idx);
1452 }
1453 write_idx += 1;
1454 read_idx += dimension;
1455 }
1456 def_levels.truncate(write_idx);
1457 }
1458}
1459
1460#[derive(Debug)]
1474pub struct CompositeRepDefUnraveler {
1475 unravelers: Vec<RepDefUnraveler>,
1476}
1477
1478impl CompositeRepDefUnraveler {
1479 pub fn new(unravelers: Vec<RepDefUnraveler>) -> Self {
1480 Self { unravelers }
1481 }
1482
1483 pub fn unravel_validity(&mut self, num_values: usize) -> Option<NullBuffer> {
1487 let is_all_valid = self
1488 .unravelers
1489 .iter()
1490 .all(|unraveler| unraveler.is_all_valid());
1491
1492 if is_all_valid {
1493 for unraveler in self.unravelers.iter_mut() {
1494 unraveler.skip_validity();
1495 }
1496 None
1497 } else {
1498 let mut validity = BooleanBufferBuilder::new(num_values);
1499 for unraveler in self.unravelers.iter_mut() {
1500 unraveler.unravel_validity(&mut validity);
1501 }
1502 Some(NullBuffer::new(validity.finish()))
1503 }
1504 }
1505
1506 pub fn unravel_fsl_validity(
1507 &mut self,
1508 num_values: usize,
1509 dimension: usize,
1510 ) -> Option<NullBuffer> {
1511 for unraveler in self.unravelers.iter_mut() {
1512 unraveler.decimate(dimension);
1513 }
1514 self.unravel_validity(num_values)
1515 }
1516
1517 pub fn unravel_offsets<T: ArrowNativeType>(
1519 &mut self,
1520 ) -> Result<(OffsetBuffer<T>, Option<NullBuffer>)> {
1521 let mut is_all_valid = true;
1522 let mut max_num_lists = 0;
1523 for unraveler in self.unravelers.iter() {
1524 is_all_valid &= unraveler.is_all_valid();
1525 max_num_lists += unraveler.max_lists();
1526 }
1527
1528 let mut validity = if is_all_valid {
1529 None
1530 } else {
1531 Some(BooleanBufferBuilder::new(max_num_lists))
1534 };
1535
1536 let mut offsets = Vec::with_capacity(max_num_lists + 1);
1537
1538 for unraveler in self.unravelers.iter_mut() {
1539 unraveler.unravel_offsets(&mut offsets, validity.as_mut())?;
1540 }
1541
1542 Ok((
1543 OffsetBuffer::new(ScalarBuffer::from(offsets)),
1544 validity.map(|mut v| NullBuffer::new(v.finish())),
1545 ))
1546 }
1547}
1548
1549#[derive(Debug)]
1555pub struct BinaryControlWordIterator<I: Iterator<Item = (u16, u16)>, W> {
1556 repdef: I,
1557 def_width: usize,
1558 max_rep: u16,
1559 max_visible_def: u16,
1560 rep_mask: u16,
1561 def_mask: u16,
1562 bits_rep: u8,
1563 bits_def: u8,
1564 phantom: std::marker::PhantomData<W>,
1565}
1566
1567impl<I: Iterator<Item = (u16, u16)>> BinaryControlWordIterator<I, u8> {
1568 fn append_next(&mut self, buf: &mut Vec<u8>) -> Option<ControlWordDesc> {
1569 let next = self.repdef.next()?;
1570 let control_word: u8 =
1571 (((next.0 & self.rep_mask) as u8) << self.def_width) + ((next.1 & self.def_mask) as u8);
1572 buf.push(control_word);
1573 let is_new_row = next.0 == self.max_rep;
1574 let is_visible = next.1 <= self.max_visible_def;
1575 let is_valid_item = next.1 == 0;
1576 Some(ControlWordDesc {
1577 is_new_row,
1578 is_visible,
1579 is_valid_item,
1580 })
1581 }
1582}
1583
1584impl<I: Iterator<Item = (u16, u16)>> BinaryControlWordIterator<I, u16> {
1585 fn append_next(&mut self, buf: &mut Vec<u8>) -> Option<ControlWordDesc> {
1586 let next = self.repdef.next()?;
1587 let control_word: u16 =
1588 ((next.0 & self.rep_mask) << self.def_width) + (next.1 & self.def_mask);
1589 let control_word = control_word.to_le_bytes();
1590 buf.push(control_word[0]);
1591 buf.push(control_word[1]);
1592 let is_new_row = next.0 == self.max_rep;
1593 let is_visible = next.1 <= self.max_visible_def;
1594 let is_valid_item = next.1 == 0;
1595 Some(ControlWordDesc {
1596 is_new_row,
1597 is_visible,
1598 is_valid_item,
1599 })
1600 }
1601}
1602
1603impl<I: Iterator<Item = (u16, u16)>> BinaryControlWordIterator<I, u32> {
1604 fn append_next(&mut self, buf: &mut Vec<u8>) -> Option<ControlWordDesc> {
1605 let next = self.repdef.next()?;
1606 let control_word: u32 = (((next.0 & self.rep_mask) as u32) << self.def_width)
1607 + ((next.1 & self.def_mask) as u32);
1608 let control_word = control_word.to_le_bytes();
1609 buf.push(control_word[0]);
1610 buf.push(control_word[1]);
1611 buf.push(control_word[2]);
1612 buf.push(control_word[3]);
1613 let is_new_row = next.0 == self.max_rep;
1614 let is_visible = next.1 <= self.max_visible_def;
1615 let is_valid_item = next.1 == 0;
1616 Some(ControlWordDesc {
1617 is_new_row,
1618 is_visible,
1619 is_valid_item,
1620 })
1621 }
1622}
1623
1624#[derive(Debug)]
1626pub struct UnaryControlWordIterator<I: Iterator<Item = u16>, W> {
1627 repdef: I,
1628 level_mask: u16,
1629 bits_rep: u8,
1630 bits_def: u8,
1631 max_rep: u16,
1632 phantom: std::marker::PhantomData<W>,
1633}
1634
1635impl<I: Iterator<Item = u16>> UnaryControlWordIterator<I, u8> {
1636 fn append_next(&mut self, buf: &mut Vec<u8>) -> Option<ControlWordDesc> {
1637 let next = self.repdef.next()?;
1638 buf.push((next & self.level_mask) as u8);
1639 let is_new_row = self.max_rep == 0 || next == self.max_rep;
1640 let is_valid_item = next == 0 || self.bits_def == 0;
1641 Some(ControlWordDesc {
1642 is_new_row,
1643 is_visible: true,
1646 is_valid_item,
1647 })
1648 }
1649}
1650
1651impl<I: Iterator<Item = u16>> UnaryControlWordIterator<I, u16> {
1652 fn append_next(&mut self, buf: &mut Vec<u8>) -> Option<ControlWordDesc> {
1653 let next = self.repdef.next().unwrap() & self.level_mask;
1654 let control_word = next.to_le_bytes();
1655 buf.push(control_word[0]);
1656 buf.push(control_word[1]);
1657 let is_new_row = self.max_rep == 0 || next == self.max_rep;
1658 let is_valid_item = next == 0 || self.bits_def == 0;
1659 Some(ControlWordDesc {
1660 is_new_row,
1661 is_visible: true,
1662 is_valid_item,
1663 })
1664 }
1665}
1666
1667impl<I: Iterator<Item = u16>> UnaryControlWordIterator<I, u32> {
1668 fn append_next(&mut self, buf: &mut Vec<u8>) -> Option<ControlWordDesc> {
1669 let next = self.repdef.next()?;
1670 let next = (next & self.level_mask) as u32;
1671 let control_word = next.to_le_bytes();
1672 buf.push(control_word[0]);
1673 buf.push(control_word[1]);
1674 buf.push(control_word[2]);
1675 buf.push(control_word[3]);
1676 let is_new_row = self.max_rep == 0 || next as u16 == self.max_rep;
1677 let is_valid_item = next == 0 || self.bits_def == 0;
1678 Some(ControlWordDesc {
1679 is_new_row,
1680 is_visible: true,
1681 is_valid_item,
1682 })
1683 }
1684}
1685
1686#[derive(Debug)]
1688pub struct NilaryControlWordIterator {
1689 len: usize,
1690 idx: usize,
1691}
1692
1693impl NilaryControlWordIterator {
1694 fn append_next(&mut self) -> Option<ControlWordDesc> {
1695 if self.idx == self.len {
1696 None
1697 } else {
1698 self.idx += 1;
1699 Some(ControlWordDesc {
1700 is_new_row: true,
1701 is_visible: true,
1702 is_valid_item: true,
1703 })
1704 }
1705 }
1706}
1707
1708fn get_mask(width: u16) -> u16 {
1710 (1 << width) - 1
1711}
1712
1713type SpecificBinaryControlWordIterator<'a, T> = BinaryControlWordIterator<
1716 Zip<Copied<std::slice::Iter<'a, u16>>, Copied<std::slice::Iter<'a, u16>>>,
1717 T,
1718>;
1719
1720#[derive(Debug)]
1730pub enum ControlWordIterator<'a> {
1731 Binary8(SpecificBinaryControlWordIterator<'a, u8>),
1732 Binary16(SpecificBinaryControlWordIterator<'a, u16>),
1733 Binary32(SpecificBinaryControlWordIterator<'a, u32>),
1734 Unary8(UnaryControlWordIterator<Copied<std::slice::Iter<'a, u16>>, u8>),
1735 Unary16(UnaryControlWordIterator<Copied<std::slice::Iter<'a, u16>>, u16>),
1736 Unary32(UnaryControlWordIterator<Copied<std::slice::Iter<'a, u16>>, u32>),
1737 Nilary(NilaryControlWordIterator),
1738}
1739
1740#[derive(Debug)]
1742pub struct ControlWordDesc {
1743 pub is_new_row: bool,
1744 pub is_visible: bool,
1745 pub is_valid_item: bool,
1746}
1747
1748impl ControlWordIterator<'_> {
1749 pub fn append_next(&mut self, buf: &mut Vec<u8>) -> Option<ControlWordDesc> {
1753 match self {
1754 Self::Binary8(iter) => iter.append_next(buf),
1755 Self::Binary16(iter) => iter.append_next(buf),
1756 Self::Binary32(iter) => iter.append_next(buf),
1757 Self::Unary8(iter) => iter.append_next(buf),
1758 Self::Unary16(iter) => iter.append_next(buf),
1759 Self::Unary32(iter) => iter.append_next(buf),
1760 Self::Nilary(iter) => iter.append_next(),
1761 }
1762 }
1763
1764 pub fn has_repetition(&self) -> bool {
1766 match self {
1767 Self::Binary8(_) | Self::Binary16(_) | Self::Binary32(_) => true,
1768 Self::Unary8(iter) => iter.bits_rep > 0,
1769 Self::Unary16(iter) => iter.bits_rep > 0,
1770 Self::Unary32(iter) => iter.bits_rep > 0,
1771 Self::Nilary(_) => false,
1772 }
1773 }
1774
1775 pub fn bytes_per_word(&self) -> usize {
1777 match self {
1778 Self::Binary8(_) => 1,
1779 Self::Binary16(_) => 2,
1780 Self::Binary32(_) => 4,
1781 Self::Unary8(_) => 1,
1782 Self::Unary16(_) => 2,
1783 Self::Unary32(_) => 4,
1784 Self::Nilary(_) => 0,
1785 }
1786 }
1787
1788 pub fn bits_rep(&self) -> u8 {
1790 match self {
1791 Self::Binary8(iter) => iter.bits_rep,
1792 Self::Binary16(iter) => iter.bits_rep,
1793 Self::Binary32(iter) => iter.bits_rep,
1794 Self::Unary8(iter) => iter.bits_rep,
1795 Self::Unary16(iter) => iter.bits_rep,
1796 Self::Unary32(iter) => iter.bits_rep,
1797 Self::Nilary(_) => 0,
1798 }
1799 }
1800
1801 pub fn bits_def(&self) -> u8 {
1803 match self {
1804 Self::Binary8(iter) => iter.bits_def,
1805 Self::Binary16(iter) => iter.bits_def,
1806 Self::Binary32(iter) => iter.bits_def,
1807 Self::Unary8(iter) => iter.bits_def,
1808 Self::Unary16(iter) => iter.bits_def,
1809 Self::Unary32(iter) => iter.bits_def,
1810 Self::Nilary(_) => 0,
1811 }
1812 }
1813}
1814
1815pub fn build_control_word_iterator<'a>(
1819 rep: Option<&'a [u16]>,
1820 max_rep: u16,
1821 def: Option<&'a [u16]>,
1822 max_def: u16,
1823 max_visible_def: u16,
1824 len: usize,
1825) -> ControlWordIterator<'a> {
1826 let rep_width = if max_rep == 0 {
1827 0
1828 } else {
1829 log_2_ceil(max_rep as u32) as u16
1830 };
1831 let rep_mask = if max_rep == 0 { 0 } else { get_mask(rep_width) };
1832 let def_width = if max_def == 0 {
1833 0
1834 } else {
1835 log_2_ceil(max_def as u32) as u16
1836 };
1837 let def_mask = if max_def == 0 { 0 } else { get_mask(def_width) };
1838 let total_width = rep_width + def_width;
1839 match (rep, def) {
1840 (Some(rep), Some(def)) => {
1841 let iter = rep.iter().copied().zip(def.iter().copied());
1842 let def_width = def_width as usize;
1843 if total_width <= 8 {
1844 ControlWordIterator::Binary8(BinaryControlWordIterator {
1845 repdef: iter,
1846 rep_mask,
1847 def_mask,
1848 def_width,
1849 max_rep,
1850 max_visible_def,
1851 bits_rep: rep_width as u8,
1852 bits_def: def_width as u8,
1853 phantom: std::marker::PhantomData,
1854 })
1855 } else if total_width <= 16 {
1856 ControlWordIterator::Binary16(BinaryControlWordIterator {
1857 repdef: iter,
1858 rep_mask,
1859 def_mask,
1860 def_width,
1861 max_rep,
1862 max_visible_def,
1863 bits_rep: rep_width as u8,
1864 bits_def: def_width as u8,
1865 phantom: std::marker::PhantomData,
1866 })
1867 } else {
1868 ControlWordIterator::Binary32(BinaryControlWordIterator {
1869 repdef: iter,
1870 rep_mask,
1871 def_mask,
1872 def_width,
1873 max_rep,
1874 max_visible_def,
1875 bits_rep: rep_width as u8,
1876 bits_def: def_width as u8,
1877 phantom: std::marker::PhantomData,
1878 })
1879 }
1880 }
1881 (Some(lev), None) => {
1882 let iter = lev.iter().copied();
1883 if total_width <= 8 {
1884 ControlWordIterator::Unary8(UnaryControlWordIterator {
1885 repdef: iter,
1886 level_mask: rep_mask,
1887 bits_rep: total_width as u8,
1888 bits_def: 0,
1889 max_rep,
1890 phantom: std::marker::PhantomData,
1891 })
1892 } else if total_width <= 16 {
1893 ControlWordIterator::Unary16(UnaryControlWordIterator {
1894 repdef: iter,
1895 level_mask: rep_mask,
1896 bits_rep: total_width as u8,
1897 bits_def: 0,
1898 max_rep,
1899 phantom: std::marker::PhantomData,
1900 })
1901 } else {
1902 ControlWordIterator::Unary32(UnaryControlWordIterator {
1903 repdef: iter,
1904 level_mask: rep_mask,
1905 bits_rep: total_width as u8,
1906 bits_def: 0,
1907 max_rep,
1908 phantom: std::marker::PhantomData,
1909 })
1910 }
1911 }
1912 (None, Some(lev)) => {
1913 let iter = lev.iter().copied();
1914 if total_width <= 8 {
1915 ControlWordIterator::Unary8(UnaryControlWordIterator {
1916 repdef: iter,
1917 level_mask: def_mask,
1918 bits_rep: 0,
1919 bits_def: total_width as u8,
1920 max_rep: 0,
1921 phantom: std::marker::PhantomData,
1922 })
1923 } else if total_width <= 16 {
1924 ControlWordIterator::Unary16(UnaryControlWordIterator {
1925 repdef: iter,
1926 level_mask: def_mask,
1927 bits_rep: 0,
1928 bits_def: total_width as u8,
1929 max_rep: 0,
1930 phantom: std::marker::PhantomData,
1931 })
1932 } else {
1933 ControlWordIterator::Unary32(UnaryControlWordIterator {
1934 repdef: iter,
1935 level_mask: def_mask,
1936 bits_rep: 0,
1937 bits_def: total_width as u8,
1938 max_rep: 0,
1939 phantom: std::marker::PhantomData,
1940 })
1941 }
1942 }
1943 (None, None) => ControlWordIterator::Nilary(NilaryControlWordIterator { len, idx: 0 }),
1944 }
1945}
1946
1947#[derive(Copy, Clone, Debug)]
1951pub enum ControlWordParser {
1952 BOTH8(u8, u32),
1955 BOTH16(u8, u32),
1956 BOTH32(u8, u32),
1957 REP8,
1958 REP16,
1959 REP32,
1960 DEF8,
1961 DEF16,
1962 DEF32,
1963 NIL,
1964}
1965
1966impl ControlWordParser {
1967 fn parse_both<const WORD_SIZE: u8>(
1968 src: &[u8],
1969 dst_rep: &mut Vec<u16>,
1970 dst_def: &mut Vec<u16>,
1971 bits_to_shift: u8,
1972 mask_to_apply: u32,
1973 ) {
1974 match WORD_SIZE {
1975 1 => {
1976 let word = src[0];
1977 let rep = word >> bits_to_shift;
1978 let def = word & (mask_to_apply as u8);
1979 dst_rep.push(rep as u16);
1980 dst_def.push(def as u16);
1981 }
1982 2 => {
1983 let word = u16::from_le_bytes([src[0], src[1]]);
1984 let rep = word >> bits_to_shift;
1985 let def = word & mask_to_apply as u16;
1986 dst_rep.push(rep);
1987 dst_def.push(def);
1988 }
1989 4 => {
1990 let word = u32::from_le_bytes([src[0], src[1], src[2], src[3]]);
1991 let rep = word >> bits_to_shift;
1992 let def = word & mask_to_apply;
1993 dst_rep.push(rep as u16);
1994 dst_def.push(def as u16);
1995 }
1996 _ => unreachable!(),
1997 }
1998 }
1999
2000 fn parse_desc_both<const WORD_SIZE: u8>(
2001 src: &[u8],
2002 bits_to_shift: u8,
2003 mask_to_apply: u32,
2004 max_rep: u16,
2005 max_visible_def: u16,
2006 ) -> ControlWordDesc {
2007 match WORD_SIZE {
2008 1 => {
2009 let word = src[0];
2010 let rep = word >> bits_to_shift;
2011 let def = word & (mask_to_apply as u8);
2012 let is_visible = def as u16 <= max_visible_def;
2013 let is_new_row = rep as u16 == max_rep;
2014 let is_valid_item = def == 0;
2015 ControlWordDesc {
2016 is_visible,
2017 is_new_row,
2018 is_valid_item,
2019 }
2020 }
2021 2 => {
2022 let word = u16::from_le_bytes([src[0], src[1]]);
2023 let rep = word >> bits_to_shift;
2024 let def = word & mask_to_apply as u16;
2025 let is_visible = def <= max_visible_def;
2026 let is_new_row = rep == max_rep;
2027 let is_valid_item = def == 0;
2028 ControlWordDesc {
2029 is_visible,
2030 is_new_row,
2031 is_valid_item,
2032 }
2033 }
2034 4 => {
2035 let word = u32::from_le_bytes([src[0], src[1], src[2], src[3]]);
2036 let rep = word >> bits_to_shift;
2037 let def = word & mask_to_apply;
2038 let is_visible = def as u16 <= max_visible_def;
2039 let is_new_row = rep as u16 == max_rep;
2040 let is_valid_item = def == 0;
2041 ControlWordDesc {
2042 is_visible,
2043 is_new_row,
2044 is_valid_item,
2045 }
2046 }
2047 _ => unreachable!(),
2048 }
2049 }
2050
2051 fn parse_one<const WORD_SIZE: u8>(src: &[u8], dst: &mut Vec<u16>) {
2052 match WORD_SIZE {
2053 1 => {
2054 let word = src[0];
2055 dst.push(word as u16);
2056 }
2057 2 => {
2058 let word = u16::from_le_bytes([src[0], src[1]]);
2059 dst.push(word);
2060 }
2061 4 => {
2062 let word = u32::from_le_bytes([src[0], src[1], src[2], src[3]]);
2063 dst.push(word as u16);
2064 }
2065 _ => unreachable!(),
2066 }
2067 }
2068
2069 fn parse_rep_desc_one<const WORD_SIZE: u8>(src: &[u8], max_rep: u16) -> ControlWordDesc {
2070 match WORD_SIZE {
2071 1 => ControlWordDesc {
2072 is_new_row: src[0] as u16 == max_rep,
2073 is_visible: true,
2074 is_valid_item: true,
2075 },
2076 2 => ControlWordDesc {
2077 is_new_row: u16::from_le_bytes([src[0], src[1]]) == max_rep,
2078 is_visible: true,
2079 is_valid_item: true,
2080 },
2081 4 => ControlWordDesc {
2082 is_new_row: u32::from_le_bytes([src[0], src[1], src[2], src[3]]) as u16 == max_rep,
2083 is_visible: true,
2084 is_valid_item: true,
2085 },
2086 _ => unreachable!(),
2087 }
2088 }
2089
2090 fn parse_def_desc_one<const WORD_SIZE: u8>(src: &[u8]) -> ControlWordDesc {
2091 match WORD_SIZE {
2092 1 => ControlWordDesc {
2093 is_new_row: true,
2094 is_visible: true,
2095 is_valid_item: src[0] == 0,
2096 },
2097 2 => ControlWordDesc {
2098 is_new_row: true,
2099 is_visible: true,
2100 is_valid_item: u16::from_le_bytes([src[0], src[1]]) == 0,
2101 },
2102 4 => ControlWordDesc {
2103 is_new_row: true,
2104 is_visible: true,
2105 is_valid_item: u32::from_le_bytes([src[0], src[1], src[2], src[3]]) as u16 == 0,
2106 },
2107 _ => unreachable!(),
2108 }
2109 }
2110
2111 pub fn bytes_per_word(&self) -> usize {
2113 match self {
2114 Self::BOTH8(..) => 1,
2115 Self::BOTH16(..) => 2,
2116 Self::BOTH32(..) => 4,
2117 Self::REP8 => 1,
2118 Self::REP16 => 2,
2119 Self::REP32 => 4,
2120 Self::DEF8 => 1,
2121 Self::DEF16 => 2,
2122 Self::DEF32 => 4,
2123 Self::NIL => 0,
2124 }
2125 }
2126
2127 pub fn parse(&self, src: &[u8], dst_rep: &mut Vec<u16>, dst_def: &mut Vec<u16>) {
2134 match self {
2135 Self::BOTH8(bits_to_shift, mask_to_apply) => {
2136 Self::parse_both::<1>(src, dst_rep, dst_def, *bits_to_shift, *mask_to_apply)
2137 }
2138 Self::BOTH16(bits_to_shift, mask_to_apply) => {
2139 Self::parse_both::<2>(src, dst_rep, dst_def, *bits_to_shift, *mask_to_apply)
2140 }
2141 Self::BOTH32(bits_to_shift, mask_to_apply) => {
2142 Self::parse_both::<4>(src, dst_rep, dst_def, *bits_to_shift, *mask_to_apply)
2143 }
2144 Self::REP8 => Self::parse_one::<1>(src, dst_rep),
2145 Self::REP16 => Self::parse_one::<2>(src, dst_rep),
2146 Self::REP32 => Self::parse_one::<4>(src, dst_rep),
2147 Self::DEF8 => Self::parse_one::<1>(src, dst_def),
2148 Self::DEF16 => Self::parse_one::<2>(src, dst_def),
2149 Self::DEF32 => Self::parse_one::<4>(src, dst_def),
2150 Self::NIL => {}
2151 }
2152 }
2153
2154 pub fn has_rep(&self) -> bool {
2156 match self {
2157 Self::BOTH8(..)
2158 | Self::BOTH16(..)
2159 | Self::BOTH32(..)
2160 | Self::REP8
2161 | Self::REP16
2162 | Self::REP32 => true,
2163 Self::DEF8 | Self::DEF16 | Self::DEF32 | Self::NIL => false,
2164 }
2165 }
2166
2167 pub fn parse_desc(&self, src: &[u8], max_rep: u16, max_visible_def: u16) -> ControlWordDesc {
2169 match self {
2170 Self::BOTH8(bits_to_shift, mask_to_apply) => Self::parse_desc_both::<1>(
2171 src,
2172 *bits_to_shift,
2173 *mask_to_apply,
2174 max_rep,
2175 max_visible_def,
2176 ),
2177 Self::BOTH16(bits_to_shift, mask_to_apply) => Self::parse_desc_both::<2>(
2178 src,
2179 *bits_to_shift,
2180 *mask_to_apply,
2181 max_rep,
2182 max_visible_def,
2183 ),
2184 Self::BOTH32(bits_to_shift, mask_to_apply) => Self::parse_desc_both::<4>(
2185 src,
2186 *bits_to_shift,
2187 *mask_to_apply,
2188 max_rep,
2189 max_visible_def,
2190 ),
2191 Self::REP8 => Self::parse_rep_desc_one::<1>(src, max_rep),
2192 Self::REP16 => Self::parse_rep_desc_one::<2>(src, max_rep),
2193 Self::REP32 => Self::parse_rep_desc_one::<4>(src, max_rep),
2194 Self::DEF8 => Self::parse_def_desc_one::<1>(src),
2195 Self::DEF16 => Self::parse_def_desc_one::<2>(src),
2196 Self::DEF32 => Self::parse_def_desc_one::<4>(src),
2197 Self::NIL => ControlWordDesc {
2198 is_new_row: true,
2199 is_valid_item: true,
2200 is_visible: true,
2201 },
2202 }
2203 }
2204
2205 pub fn new(bits_rep: u8, bits_def: u8) -> Self {
2207 let total_bits = bits_rep + bits_def;
2208
2209 enum WordSize {
2210 One,
2211 Two,
2212 Four,
2213 }
2214
2215 let word_size = if total_bits <= 8 {
2216 WordSize::One
2217 } else if total_bits <= 16 {
2218 WordSize::Two
2219 } else {
2220 WordSize::Four
2221 };
2222
2223 match (bits_rep > 0, bits_def > 0, word_size) {
2224 (false, false, _) => Self::NIL,
2225 (false, true, WordSize::One) => Self::DEF8,
2226 (false, true, WordSize::Two) => Self::DEF16,
2227 (false, true, WordSize::Four) => Self::DEF32,
2228 (true, false, WordSize::One) => Self::REP8,
2229 (true, false, WordSize::Two) => Self::REP16,
2230 (true, false, WordSize::Four) => Self::REP32,
2231 (true, true, WordSize::One) => Self::BOTH8(bits_def, get_mask(bits_def as u16) as u32),
2232 (true, true, WordSize::Two) => Self::BOTH16(bits_def, get_mask(bits_def as u16) as u32),
2233 (true, true, WordSize::Four) => {
2234 Self::BOTH32(bits_def, get_mask(bits_def as u16) as u32)
2235 }
2236 }
2237 }
2238}
2239
2240#[cfg(test)]
2241mod tests {
2242 use arrow_buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
2243
2244 use crate::repdef::{
2245 CompositeRepDefUnraveler, DefinitionInterpretation, RepDefUnraveler, SerializedRepDefs,
2246 };
2247
2248 use super::RepDefBuilder;
2249
2250 fn validity(values: &[bool]) -> NullBuffer {
2251 NullBuffer::from_iter(values.iter().copied())
2252 }
2253
2254 fn offsets_32(values: &[i32]) -> OffsetBuffer<i32> {
2255 OffsetBuffer::<i32>::new(ScalarBuffer::from_iter(values.iter().copied()))
2256 }
2257
2258 fn offsets_64(values: &[i64]) -> OffsetBuffer<i64> {
2259 OffsetBuffer::<i64>::new(ScalarBuffer::from_iter(values.iter().copied()))
2260 }
2261
2262 #[test]
2263 fn test_repdef_basic() {
2264 let mut builder = RepDefBuilder::default();
2266 builder.add_offsets(
2267 offsets_64(&[0, 2, 2, 5]),
2268 Some(validity(&[true, false, true])),
2269 );
2270 builder.add_offsets(
2271 offsets_64(&[0, 1, 3, 5, 5, 9]),
2272 Some(validity(&[true, true, true, false, true])),
2273 );
2274 builder.add_validity_bitmap(validity(&[
2275 true, true, true, false, false, false, true, true, false,
2276 ]));
2277
2278 let repdefs = RepDefBuilder::serialize(vec![builder]);
2279 let rep = repdefs.repetition_levels.unwrap();
2280 let def = repdefs.definition_levels.unwrap();
2281
2282 assert_eq!(vec![0, 0, 0, 3, 1, 1, 2, 1, 0, 0, 1], *def);
2283 assert_eq!(vec![2, 1, 0, 2, 2, 0, 1, 1, 0, 0, 0], *rep);
2284
2285 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
2288 Some(rep.as_ref().to_vec()),
2289 Some(def.as_ref().to_vec()),
2290 repdefs.def_meaning.into(),
2291 9,
2292 )]);
2293
2294 assert_eq!(
2297 unraveler.unravel_validity(9),
2298 Some(validity(&[
2299 true, true, true, false, false, false, true, true, false
2300 ]))
2301 );
2302 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
2303 assert_eq!(off.inner(), offsets_32(&[0, 1, 3, 5, 5, 9]).inner());
2304 assert_eq!(val, Some(validity(&[true, true, true, false, true])));
2305 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
2306 assert_eq!(off.inner(), offsets_32(&[0, 2, 2, 5]).inner());
2307 assert_eq!(val, Some(validity(&[true, false, true])));
2308 }
2309
2310 #[test]
2311 fn test_repdef_simple_null_empty_list() {
2312 let check = |repdefs: SerializedRepDefs, last_def: DefinitionInterpretation| {
2313 let rep = repdefs.repetition_levels.unwrap();
2314 let def = repdefs.definition_levels.unwrap();
2315
2316 assert_eq!([1, 0, 1, 1, 0, 0], *rep);
2317 assert_eq!([0, 0, 2, 0, 1, 0], *def);
2318 assert_eq!(
2319 vec![DefinitionInterpretation::NullableItem, last_def,],
2320 repdefs.def_meaning
2321 );
2322 };
2323
2324 let mut builder = RepDefBuilder::default();
2328 builder.add_offsets(
2329 offsets_32(&[0, 2, 2, 5]),
2330 Some(validity(&[true, false, true])),
2331 );
2332 builder.add_validity_bitmap(validity(&[true, true, true, false, true]));
2333
2334 let repdefs = RepDefBuilder::serialize(vec![builder]);
2335
2336 check(repdefs, DefinitionInterpretation::NullableList);
2337
2338 let mut builder = RepDefBuilder::default();
2340 builder.add_offsets(offsets_32(&[0, 2, 2, 5]), None);
2341 builder.add_validity_bitmap(validity(&[true, true, true, false, true]));
2342
2343 let repdefs = RepDefBuilder::serialize(vec![builder]);
2344
2345 check(repdefs, DefinitionInterpretation::EmptyableList);
2346 }
2347
2348 #[test]
2349 fn test_repdef_empty_list_at_end() {
2350 let mut builder = RepDefBuilder::default();
2352 builder.add_offsets(offsets_32(&[0, 2, 5, 5]), None);
2353 builder.add_validity_bitmap(validity(&[true, true, true, false, true]));
2354
2355 let repdefs = RepDefBuilder::serialize(vec![builder]);
2356
2357 let rep = repdefs.repetition_levels.unwrap();
2358 let def = repdefs.definition_levels.unwrap();
2359
2360 assert_eq!([1, 0, 1, 0, 0, 1], *rep);
2361 assert_eq!([0, 0, 0, 1, 0, 2], *def);
2362 assert_eq!(
2363 vec![
2364 DefinitionInterpretation::NullableItem,
2365 DefinitionInterpretation::EmptyableList,
2366 ],
2367 repdefs.def_meaning
2368 );
2369 }
2370
2371 #[test]
2372 fn test_repdef_abnormal_nulls() {
2373 let mut builder = RepDefBuilder::default();
2376 builder.add_offsets(
2377 offsets_32(&[0, 2, 5, 8]),
2378 Some(validity(&[true, false, true])),
2379 );
2380 builder.add_no_null(5);
2383
2384 let repdefs = RepDefBuilder::serialize(vec![builder]);
2385
2386 let rep = repdefs.repetition_levels.unwrap();
2387 let def = repdefs.definition_levels.unwrap();
2388
2389 assert_eq!([1, 0, 1, 1, 0, 0], *rep);
2390 assert_eq!([0, 0, 1, 0, 0, 0], *def);
2391
2392 assert_eq!(
2393 vec![
2394 DefinitionInterpretation::AllValidItem,
2395 DefinitionInterpretation::NullableList,
2396 ],
2397 repdefs.def_meaning
2398 );
2399 }
2400
2401 #[test]
2402 fn test_repdef_fsl() {
2403 let mut builder = RepDefBuilder::default();
2404 builder.add_fsl(Some(validity(&[true, false])), 2, 2);
2405 builder.add_fsl(None, 2, 4);
2406 builder.add_validity_bitmap(validity(&[
2407 true, false, true, false, true, false, true, false,
2408 ]));
2409
2410 let repdefs = RepDefBuilder::serialize(vec![builder]);
2411
2412 assert_eq!(
2413 vec![
2414 DefinitionInterpretation::NullableItem,
2415 DefinitionInterpretation::AllValidItem,
2416 DefinitionInterpretation::NullableItem
2417 ],
2418 repdefs.def_meaning
2419 );
2420
2421 assert!(repdefs.repetition_levels.is_none());
2422
2423 let def = repdefs.definition_levels.unwrap();
2424
2425 assert_eq!([0, 1, 0, 1, 2, 2, 2, 2], *def);
2426
2427 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
2428 None,
2429 Some(def.as_ref().to_vec()),
2430 repdefs.def_meaning.into(),
2431 8,
2432 )]);
2433
2434 assert_eq!(
2435 unraveler.unravel_validity(8),
2436 Some(validity(&[
2437 true, false, true, false, false, false, false, false
2438 ]))
2439 );
2440 assert_eq!(unraveler.unravel_fsl_validity(4, 2), None);
2441 assert_eq!(
2442 unraveler.unravel_fsl_validity(2, 2),
2443 Some(validity(&[true, false]))
2444 );
2445 }
2446
2447 #[test]
2448 fn test_repdef_fsl_allvalid_item() {
2449 let mut builder = RepDefBuilder::default();
2450 builder.add_fsl(Some(validity(&[true, false])), 2, 2);
2451 builder.add_fsl(None, 2, 4);
2452 builder.add_no_null(8);
2453
2454 let repdefs = RepDefBuilder::serialize(vec![builder]);
2455
2456 assert_eq!(
2457 vec![
2458 DefinitionInterpretation::AllValidItem,
2459 DefinitionInterpretation::AllValidItem,
2460 DefinitionInterpretation::NullableItem
2461 ],
2462 repdefs.def_meaning
2463 );
2464
2465 assert!(repdefs.repetition_levels.is_none());
2466
2467 let def = repdefs.definition_levels.unwrap();
2468
2469 assert_eq!([0, 0, 0, 0, 1, 1, 1, 1], *def);
2470
2471 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
2472 None,
2473 Some(def.as_ref().to_vec()),
2474 repdefs.def_meaning.into(),
2475 8,
2476 )]);
2477
2478 assert_eq!(unraveler.unravel_validity(8), None);
2479 assert_eq!(unraveler.unravel_fsl_validity(4, 2), None);
2480 assert_eq!(
2481 unraveler.unravel_fsl_validity(2, 2),
2482 Some(validity(&[true, false]))
2483 );
2484 }
2485
2486 #[test]
2487 fn test_repdef_sliced_offsets() {
2488 let mut builder = RepDefBuilder::default();
2491 builder.add_offsets(
2492 offsets_32(&[5, 7, 7, 10]),
2493 Some(validity(&[true, false, true])),
2494 );
2495 builder.add_no_null(5);
2496
2497 let repdefs = RepDefBuilder::serialize(vec![builder]);
2498
2499 let rep = repdefs.repetition_levels.unwrap();
2500 let def = repdefs.definition_levels.unwrap();
2501
2502 assert_eq!([1, 0, 1, 1, 0, 0], *rep);
2503 assert_eq!([0, 0, 1, 0, 0, 0], *def);
2504
2505 assert_eq!(
2506 vec![
2507 DefinitionInterpretation::AllValidItem,
2508 DefinitionInterpretation::NullableList,
2509 ],
2510 repdefs.def_meaning
2511 );
2512 }
2513
2514 #[test]
2515 fn test_repdef_complex_null_empty() {
2516 let mut builder = RepDefBuilder::default();
2517 builder.add_offsets(
2518 offsets_32(&[0, 4, 4, 4, 6]),
2519 Some(validity(&[true, false, true, true])),
2520 );
2521 builder.add_offsets(
2522 offsets_32(&[0, 1, 1, 2, 2, 2, 3]),
2523 Some(validity(&[true, false, true, false, true, true])),
2524 );
2525 builder.add_no_null(3);
2526
2527 let repdefs = RepDefBuilder::serialize(vec![builder]);
2528
2529 let rep = repdefs.repetition_levels.unwrap();
2530 let def = repdefs.definition_levels.unwrap();
2531
2532 assert_eq!([2, 1, 1, 1, 2, 2, 2, 1], *rep);
2533 assert_eq!([0, 1, 0, 1, 3, 4, 2, 0], *def);
2534 }
2535
2536 #[test]
2537 fn test_repdef_empty_list_no_null() {
2538 let mut builder = RepDefBuilder::default();
2541 builder.add_offsets(offsets_32(&[0, 4, 4, 4, 6]), None);
2542 builder.add_no_null(6);
2543
2544 let repdefs = RepDefBuilder::serialize(vec![builder]);
2545
2546 let rep = repdefs.repetition_levels.unwrap();
2547 let def = repdefs.definition_levels.unwrap();
2548
2549 assert_eq!([1, 0, 0, 0, 1, 1, 1, 0], *rep);
2550 assert_eq!([0, 0, 0, 0, 1, 1, 0, 0], *def);
2551
2552 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
2553 Some(rep.as_ref().to_vec()),
2554 Some(def.as_ref().to_vec()),
2555 repdefs.def_meaning.into(),
2556 8,
2557 )]);
2558
2559 assert_eq!(unraveler.unravel_validity(6), None);
2560 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
2561 assert_eq!(off.inner(), offsets_32(&[0, 4, 4, 4, 6]).inner());
2562 assert_eq!(val, None);
2563 }
2564
2565 #[test]
2566 fn test_repdef_all_valid() {
2567 let mut builder = RepDefBuilder::default();
2568 builder.add_offsets(offsets_64(&[0, 2, 3, 5]), None);
2569 builder.add_offsets(offsets_64(&[0, 1, 3, 5, 7, 9]), None);
2570 builder.add_no_null(9);
2571
2572 let repdefs = RepDefBuilder::serialize(vec![builder]);
2573 let rep = repdefs.repetition_levels.unwrap();
2574 assert!(repdefs.definition_levels.is_none());
2575
2576 assert_eq!([2, 1, 0, 2, 0, 2, 0, 1, 0], *rep);
2577
2578 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
2579 Some(rep.as_ref().to_vec()),
2580 None,
2581 repdefs.def_meaning.into(),
2582 9,
2583 )]);
2584
2585 assert_eq!(unraveler.unravel_validity(9), None);
2586 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
2587 assert_eq!(off.inner(), offsets_32(&[0, 1, 3, 5, 7, 9]).inner());
2588 assert_eq!(val, None);
2589 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
2590 assert_eq!(off.inner(), offsets_32(&[0, 2, 3, 5]).inner());
2591 assert_eq!(val, None);
2592 }
2593
2594 #[test]
2595 fn test_only_empty_lists() {
2596 let mut builder = RepDefBuilder::default();
2597 builder.add_offsets(offsets_32(&[0, 4, 4, 4, 6]), None);
2598 builder.add_no_null(6);
2599
2600 let repdefs = RepDefBuilder::serialize(vec![builder]);
2601
2602 let rep = repdefs.repetition_levels.unwrap();
2603 let def = repdefs.definition_levels.unwrap();
2604
2605 assert_eq!([1, 0, 0, 0, 1, 1, 1, 0], *rep);
2606 assert_eq!([0, 0, 0, 0, 1, 1, 0, 0], *def);
2607
2608 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
2609 Some(rep.as_ref().to_vec()),
2610 Some(def.as_ref().to_vec()),
2611 repdefs.def_meaning.into(),
2612 8,
2613 )]);
2614
2615 assert_eq!(unraveler.unravel_validity(6), None);
2616 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
2617 assert_eq!(off.inner(), offsets_32(&[0, 4, 4, 4, 6]).inner());
2618 assert_eq!(val, None);
2619 }
2620
2621 #[test]
2622 fn test_only_null_lists() {
2623 let mut builder = RepDefBuilder::default();
2624 builder.add_offsets(
2625 offsets_32(&[0, 4, 4, 4, 6]),
2626 Some(validity(&[true, false, false, true])),
2627 );
2628 builder.add_no_null(6);
2629
2630 let repdefs = RepDefBuilder::serialize(vec![builder]);
2631
2632 let rep = repdefs.repetition_levels.unwrap();
2633 let def = repdefs.definition_levels.unwrap();
2634
2635 assert_eq!([1, 0, 0, 0, 1, 1, 1, 0], *rep);
2636 assert_eq!([0, 0, 0, 0, 1, 1, 0, 0], *def);
2637
2638 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
2639 Some(rep.as_ref().to_vec()),
2640 Some(def.as_ref().to_vec()),
2641 repdefs.def_meaning.into(),
2642 8,
2643 )]);
2644
2645 assert_eq!(unraveler.unravel_validity(6), None);
2646 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
2647 assert_eq!(off.inner(), offsets_32(&[0, 4, 4, 4, 6]).inner());
2648 assert_eq!(val, Some(validity(&[true, false, false, true])));
2649 }
2650
2651 #[test]
2652 fn test_null_and_empty_lists() {
2653 let mut builder = RepDefBuilder::default();
2654 builder.add_offsets(
2655 offsets_32(&[0, 4, 4, 4, 6]),
2656 Some(validity(&[true, false, true, true])),
2657 );
2658 builder.add_no_null(6);
2659
2660 let repdefs = RepDefBuilder::serialize(vec![builder]);
2661
2662 let rep = repdefs.repetition_levels.unwrap();
2663 let def = repdefs.definition_levels.unwrap();
2664
2665 assert_eq!([1, 0, 0, 0, 1, 1, 1, 0], *rep);
2666 assert_eq!([0, 0, 0, 0, 1, 2, 0, 0], *def);
2667
2668 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
2669 Some(rep.as_ref().to_vec()),
2670 Some(def.as_ref().to_vec()),
2671 repdefs.def_meaning.into(),
2672 8,
2673 )]);
2674
2675 assert_eq!(unraveler.unravel_validity(6), None);
2676 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
2677 assert_eq!(off.inner(), offsets_32(&[0, 4, 4, 4, 6]).inner());
2678 assert_eq!(val, Some(validity(&[true, false, true, true])));
2679 }
2680
2681 #[test]
2682 fn test_repdef_no_rep() {
2683 let mut builder = RepDefBuilder::default();
2684 builder.add_no_null(5);
2685 builder.add_validity_bitmap(validity(&[false, false, true, true, true]));
2686 builder.add_validity_bitmap(validity(&[false, true, true, true, false]));
2687
2688 let repdefs = RepDefBuilder::serialize(vec![builder]);
2689 assert!(repdefs.repetition_levels.is_none());
2690 let def = repdefs.definition_levels.unwrap();
2691
2692 assert_eq!([2, 2, 0, 0, 1], *def);
2693
2694 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
2695 None,
2696 Some(def.as_ref().to_vec()),
2697 repdefs.def_meaning.into(),
2698 5,
2699 )]);
2700
2701 assert_eq!(
2702 unraveler.unravel_validity(5),
2703 Some(validity(&[false, false, true, true, false]))
2704 );
2705 assert_eq!(
2706 unraveler.unravel_validity(5),
2707 Some(validity(&[false, false, true, true, true]))
2708 );
2709 assert_eq!(unraveler.unravel_validity(5), None);
2710 }
2711
2712 #[test]
2713 fn test_composite_unravel() {
2714 let mut builder = RepDefBuilder::default();
2715 builder.add_offsets(
2716 offsets_64(&[0, 2, 2, 5]),
2717 Some(validity(&[true, false, true])),
2718 );
2719 builder.add_no_null(5);
2720 let repdef1 = RepDefBuilder::serialize(vec![builder]);
2721
2722 let mut builder = RepDefBuilder::default();
2723 builder.add_offsets(offsets_64(&[0, 1, 3, 5, 7, 9]), None);
2724 builder.add_no_null(9);
2725 let repdef2 = RepDefBuilder::serialize(vec![builder]);
2726
2727 let rep1 = repdef1.repetition_levels.clone().unwrap();
2728 let def1 = repdef1.definition_levels.clone().unwrap();
2729 let rep2 = repdef2.repetition_levels.clone().unwrap();
2730 assert!(repdef2.definition_levels.is_none());
2731
2732 assert_eq!([1, 0, 1, 1, 0, 0], *rep1);
2733 assert_eq!([0, 0, 1, 0, 0, 0], *def1);
2734 assert_eq!([1, 1, 0, 1, 0, 1, 0, 1, 0], *rep2);
2735
2736 let unravel1 = RepDefUnraveler::new(
2737 repdef1.repetition_levels.map(|l| l.to_vec()),
2738 repdef1.definition_levels.map(|l| l.to_vec()),
2739 repdef1.def_meaning.into(),
2740 5,
2741 );
2742 let unravel2 = RepDefUnraveler::new(
2743 repdef2.repetition_levels.map(|l| l.to_vec()),
2744 repdef2.definition_levels.map(|l| l.to_vec()),
2745 repdef2.def_meaning.into(),
2746 9,
2747 );
2748
2749 let mut unraveler = CompositeRepDefUnraveler::new(vec![unravel1, unravel2]);
2750
2751 assert!(unraveler.unravel_validity(9).is_none());
2752 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
2753 assert_eq!(
2754 off.inner(),
2755 offsets_32(&[0, 2, 2, 5, 6, 8, 10, 12, 14]).inner()
2756 );
2757 assert_eq!(
2758 val,
2759 Some(validity(&[true, false, true, true, true, true, true, true]))
2760 );
2761 }
2762
2763 #[test]
2764 fn test_repdef_multiple_builders() {
2765 let mut builder1 = RepDefBuilder::default();
2767 builder1.add_offsets(offsets_64(&[0, 2]), None);
2768 builder1.add_offsets(offsets_64(&[0, 1, 3]), None);
2769 builder1.add_validity_bitmap(validity(&[true, true, true]));
2770
2771 let mut builder2 = RepDefBuilder::default();
2772 builder2.add_offsets(offsets_64(&[0, 0, 3]), Some(validity(&[false, true])));
2773 builder2.add_offsets(
2774 offsets_64(&[0, 2, 2, 6]),
2775 Some(validity(&[true, false, true])),
2776 );
2777 builder2.add_validity_bitmap(validity(&[false, false, false, true, true, false]));
2778
2779 let repdefs = RepDefBuilder::serialize(vec![builder1, builder2]);
2780
2781 let rep = repdefs.repetition_levels.unwrap();
2782 let def = repdefs.definition_levels.unwrap();
2783
2784 assert_eq!([2, 1, 0, 2, 2, 0, 1, 1, 0, 0, 0], *rep);
2785 assert_eq!([0, 0, 0, 3, 1, 1, 2, 1, 0, 0, 1], *def);
2786 }
2787
2788 #[test]
2789 fn test_slicer() {
2790 let mut builder = RepDefBuilder::default();
2791 builder.add_offsets(
2792 offsets_64(&[0, 2, 2, 30, 30]),
2793 Some(validity(&[true, false, true, true])),
2794 );
2795 builder.add_no_null(30);
2796
2797 let repdefs = RepDefBuilder::serialize(vec![builder]);
2798
2799 let mut rep_slicer = repdefs.rep_slicer().unwrap();
2800
2801 assert_eq!(rep_slicer.slice_next(5).len(), 12);
2803 assert_eq!(rep_slicer.slice_next(20).len(), 40);
2805 assert_eq!(rep_slicer.slice_rest().len(), 12);
2807
2808 let mut def_slicer = repdefs.rep_slicer().unwrap();
2809
2810 assert_eq!(def_slicer.slice_next(5).len(), 12);
2812 assert_eq!(def_slicer.slice_next(20).len(), 40);
2814 assert_eq!(def_slicer.slice_rest().len(), 12);
2816 }
2817
2818 #[test]
2819 fn test_control_words() {
2820 fn check(
2822 rep: &[u16],
2823 def: &[u16],
2824 expected_values: Vec<u8>,
2825 expected_bytes_per_word: usize,
2826 expected_bits_rep: u8,
2827 expected_bits_def: u8,
2828 ) {
2829 let num_vals = rep.len().max(def.len());
2830 let max_rep = rep.iter().max().copied().unwrap_or(0);
2831 let max_def = def.iter().max().copied().unwrap_or(0);
2832
2833 let in_rep = if rep.is_empty() { None } else { Some(rep) };
2834 let in_def = if def.is_empty() { None } else { Some(def) };
2835
2836 let mut iter = super::build_control_word_iterator(
2837 in_rep,
2838 max_rep,
2839 in_def,
2840 max_def,
2841 max_def + 1,
2842 expected_values.len(),
2843 );
2844 assert_eq!(iter.bytes_per_word(), expected_bytes_per_word);
2845 assert_eq!(iter.bits_rep(), expected_bits_rep);
2846 assert_eq!(iter.bits_def(), expected_bits_def);
2847 let mut cw_vec = Vec::with_capacity(num_vals * iter.bytes_per_word());
2848
2849 for _ in 0..num_vals {
2850 iter.append_next(&mut cw_vec);
2851 }
2852 assert!(iter.append_next(&mut cw_vec).is_none());
2853
2854 assert_eq!(expected_values, cw_vec);
2855
2856 let parser = super::ControlWordParser::new(expected_bits_rep, expected_bits_def);
2857
2858 let mut rep_out = Vec::with_capacity(num_vals);
2859 let mut def_out = Vec::with_capacity(num_vals);
2860
2861 if expected_bytes_per_word > 0 {
2862 for slice in cw_vec.chunks_exact(expected_bytes_per_word) {
2863 parser.parse(slice, &mut rep_out, &mut def_out);
2864 }
2865 }
2866
2867 assert_eq!(rep, rep_out.as_slice());
2868 assert_eq!(def, def_out.as_slice());
2869 }
2870
2871 let rep = &[0_u16, 7, 3, 2, 9, 8, 12, 5];
2873 let def = &[5_u16, 3, 1, 2, 12, 15, 0, 2];
2874 let expected = vec![
2875 0b00000101, 0b01110011, 0b00110001, 0b00100010, 0b10011100, 0b10001111, 0b11000000, 0b01010010, ];
2884 check(rep, def, expected, 1, 4, 4);
2885
2886 let rep = &[0_u16, 7, 3, 2, 9, 8, 12, 5];
2888 let def = &[5_u16, 3, 1, 2, 12, 22, 0, 2];
2889 let expected = vec![
2890 0b00000101, 0b00000000, 0b11100011, 0b00000000, 0b01100001, 0b00000000, 0b01000010, 0b00000000, 0b00101100, 0b00000001, 0b00010110, 0b00000001, 0b10000000, 0b00000001, 0b10100010, 0b00000000, ];
2899 check(rep, def, expected, 2, 4, 5);
2900
2901 let levels = &[0_u16, 7, 3, 2, 9, 8, 12, 5];
2903 let expected = vec![
2904 0b00000000, 0b00000111, 0b00000011, 0b00000010, 0b00001001, 0b00001000, 0b00001100, 0b00000101, ];
2913 check(levels, &[], expected.clone(), 1, 4, 0);
2914
2915 check(&[], levels, expected, 1, 0, 4);
2917
2918 check(&[], &[], Vec::default(), 0, 0, 0);
2920 }
2921
2922 #[test]
2923 fn test_control_words_rep_index() {
2924 fn check(
2925 rep: &[u16],
2926 def: &[u16],
2927 expected_new_rows: Vec<bool>,
2928 expected_is_visible: Vec<bool>,
2929 ) {
2930 let num_vals = rep.len().max(def.len());
2931 let max_rep = rep.iter().max().copied().unwrap_or(0);
2932 let max_def = def.iter().max().copied().unwrap_or(0);
2933
2934 let in_rep = if rep.is_empty() { None } else { Some(rep) };
2935 let in_def = if def.is_empty() { None } else { Some(def) };
2936
2937 let mut iter = super::build_control_word_iterator(
2938 in_rep,
2939 max_rep,
2940 in_def,
2941 max_def,
2942 2,
2943 expected_new_rows.len(),
2944 );
2945
2946 let mut cw_vec = Vec::with_capacity(num_vals * iter.bytes_per_word());
2947 let mut expected_new_rows = expected_new_rows.iter().copied();
2948 let mut expected_is_visible = expected_is_visible.iter().copied();
2949 for _ in 0..expected_new_rows.len() {
2950 let word_desc = iter.append_next(&mut cw_vec).unwrap();
2951 assert_eq!(word_desc.is_new_row, expected_new_rows.next().unwrap());
2952 assert_eq!(word_desc.is_visible, expected_is_visible.next().unwrap());
2953 }
2954 assert!(iter.append_next(&mut cw_vec).is_none());
2955 }
2956
2957 let rep = &[2_u16, 1, 0, 2, 2, 0, 1, 1, 0, 2, 0];
2959 let def = &[0_u16, 0, 0, 3, 1, 1, 2, 1, 0, 0, 1];
2961
2962 check(
2964 rep,
2965 def,
2966 vec![
2967 true, false, false, true, true, false, false, false, false, true, false,
2968 ],
2969 vec![
2970 true, true, true, false, true, true, true, true, true, true, true,
2971 ],
2972 );
2973 check(
2975 rep,
2976 &[],
2977 vec![
2978 true, false, false, true, true, false, false, false, false, true, false,
2979 ],
2980 vec![true; 11],
2981 );
2982 check(
2984 &[],
2985 def,
2986 vec![
2987 true, true, true, true, true, true, true, true, true, true, true,
2988 ],
2989 vec![true; 11],
2990 );
2991 check(
2993 &[],
2994 &[],
2995 vec![
2996 true, true, true, true, true, true, true, true, true, true, true,
2997 ],
2998 vec![true; 11],
2999 );
3000 }
3001
3002 #[test]
3003 fn regress_empty_list_case() {
3004 let mut builder = RepDefBuilder::default();
3006 builder.add_validity_bitmap(validity(&[true, false, true]));
3007 builder.add_offsets(
3008 offsets_32(&[0, 0, 0, 0]),
3009 Some(validity(&[false, false, false])),
3010 );
3011 builder.add_no_null(0);
3012
3013 let repdefs = RepDefBuilder::serialize(vec![builder]);
3014 let rep = repdefs.repetition_levels.unwrap();
3015 let def = repdefs.definition_levels.unwrap();
3016
3017 assert_eq!([1, 1, 1], *rep);
3018 assert_eq!([1, 2, 1], *def);
3019
3020 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
3021 Some(rep.as_ref().to_vec()),
3022 Some(def.as_ref().to_vec()),
3023 repdefs.def_meaning.into(),
3024 0,
3025 )]);
3026
3027 assert_eq!(unraveler.unravel_validity(0), None);
3028 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
3029 assert_eq!(off.inner(), offsets_32(&[0, 0, 0, 0]).inner());
3030 assert_eq!(val, Some(validity(&[false, false, false])));
3031 let val = unraveler.unravel_validity(3).unwrap();
3032 assert_eq!(val.inner(), validity(&[true, false, true]).inner());
3033 }
3034
3035 #[test]
3036 fn regress_list_ends_null_case() {
3037 let mut builder = RepDefBuilder::default();
3038 builder.add_offsets(
3039 offsets_64(&[0, 1, 2, 2]),
3040 Some(validity(&[true, true, false])),
3041 );
3042 builder.add_offsets(offsets_64(&[0, 1, 1]), Some(validity(&[true, false])));
3043 builder.add_no_null(1);
3044
3045 let repdefs = RepDefBuilder::serialize(vec![builder]);
3046 let rep = repdefs.repetition_levels.unwrap();
3047 let def = repdefs.definition_levels.unwrap();
3048
3049 assert_eq!([2, 2, 2], *rep);
3050 assert_eq!([0, 1, 2], *def);
3051
3052 let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
3053 Some(rep.as_ref().to_vec()),
3054 Some(def.as_ref().to_vec()),
3055 repdefs.def_meaning.into(),
3056 1,
3057 )]);
3058
3059 assert_eq!(unraveler.unravel_validity(1), None);
3060 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
3061 assert_eq!(off.inner(), offsets_32(&[0, 1, 1]).inner());
3062 assert_eq!(val, Some(validity(&[true, false])));
3063 let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
3064 assert_eq!(off.inner(), offsets_32(&[0, 1, 2, 2]).inner());
3065 assert_eq!(val, Some(validity(&[true, true, false])));
3066 }
3067
3068 #[test]
3069 fn test_mixed_unraveler() {
3070 let mut unraveler = CompositeRepDefUnraveler::new(vec![
3075 RepDefUnraveler::new(
3076 None,
3077 Some(vec![0, 1, 0, 1]),
3078 vec![DefinitionInterpretation::NullableItem].into(),
3079 4,
3080 ),
3081 RepDefUnraveler::new(
3082 None,
3083 None,
3084 vec![DefinitionInterpretation::AllValidItem].into(),
3085 4,
3086 ),
3087 ]);
3088
3089 assert_eq!(
3090 unraveler.unravel_validity(8),
3091 Some(validity(&[
3092 true, false, true, false, true, true, true, true
3093 ]))
3094 );
3095
3096 let def1 = Some(vec![0, 1, 2]);
3098 let rep1 = Some(vec![1, 0, 1]);
3099
3100 let def2 = Some(vec![1, 0, 0]);
3101 let rep2 = Some(vec![1, 1, 0]);
3102
3103 let mut unraveler = CompositeRepDefUnraveler::new(vec![
3104 RepDefUnraveler::new(
3105 rep1,
3106 def1,
3107 vec![
3108 DefinitionInterpretation::NullableItem,
3109 DefinitionInterpretation::EmptyableList,
3110 ]
3111 .into(),
3112 2,
3113 ),
3114 RepDefUnraveler::new(
3115 rep2,
3116 def2,
3117 vec![
3118 DefinitionInterpretation::AllValidItem,
3119 DefinitionInterpretation::NullableList,
3120 ]
3121 .into(),
3122 2,
3123 ),
3124 ]);
3125
3126 assert_eq!(
3127 unraveler.unravel_validity(4),
3128 Some(validity(&[true, false, true, true]))
3129 );
3130 assert_eq!(
3131 unraveler.unravel_offsets::<i32>().unwrap(),
3132 (
3133 offsets_32(&[0, 2, 2, 2, 4]),
3134 Some(validity(&[true, true, false, true]))
3135 )
3136 );
3137 }
3138}