1use std::{ops::Range, sync::Arc};
5
6use arrow_array::{cast::AsArray, Array, ArrayRef, LargeListArray, ListArray};
7use arrow_schema::DataType;
8use futures::future::BoxFuture;
9use lance_arrow::deepcopy::deep_copy_nulls;
10use lance_arrow::list::ListArrayExt;
11use lance_core::Result;
12
13use crate::{
14 decoder::{
15 DecodedArray, FilterExpression, ScheduledScanLine, SchedulerContext,
16 StructuralDecodeArrayTask, StructuralFieldDecoder, StructuralFieldScheduler,
17 StructuralSchedulingJob,
18 },
19 encoder::{EncodeTask, FieldEncoder, OutOfLineBuffers},
20 repdef::RepDefBuilder,
21};
22
23pub struct ListStructuralEncoder {
31 keep_original_array: bool,
32 child: Box<dyn FieldEncoder>,
33}
34
35impl ListStructuralEncoder {
36 pub fn new(keep_original_array: bool, child: Box<dyn FieldEncoder>) -> Self {
37 Self {
38 keep_original_array,
39 child,
40 }
41 }
42}
43
44impl FieldEncoder for ListStructuralEncoder {
45 fn maybe_encode(
46 &mut self,
47 array: ArrayRef,
48 external_buffers: &mut OutOfLineBuffers,
49 mut repdef: RepDefBuilder,
50 row_number: u64,
51 num_rows: u64,
52 ) -> Result<Vec<EncodeTask>> {
53 let values = if let Some(list_arr) = array.as_list_opt::<i32>() {
54 let has_garbage_values = if self.keep_original_array {
55 repdef.add_offsets(list_arr.offsets().clone(), array.nulls().cloned())
56 } else {
57 repdef.add_offsets(list_arr.offsets().clone(), deep_copy_nulls(array.nulls()))
59 };
60 if has_garbage_values {
61 list_arr.filter_garbage_nulls().trimmed_values()
62 } else {
63 list_arr.trimmed_values()
64 }
65 } else if let Some(list_arr) = array.as_list_opt::<i64>() {
66 let has_garbage_values = if self.keep_original_array {
67 repdef.add_offsets(list_arr.offsets().clone(), array.nulls().cloned())
68 } else {
69 repdef.add_offsets(list_arr.offsets().clone(), deep_copy_nulls(array.nulls()))
70 };
71 if has_garbage_values {
72 list_arr.filter_garbage_nulls().trimmed_values()
73 } else {
74 list_arr.trimmed_values()
75 }
76 } else {
77 panic!("List encoder used for non-list data")
78 };
79 self.child
80 .maybe_encode(values, external_buffers, repdef, row_number, num_rows)
81 }
82
83 fn flush(&mut self, external_buffers: &mut OutOfLineBuffers) -> Result<Vec<EncodeTask>> {
84 self.child.flush(external_buffers)
85 }
86
87 fn num_columns(&self) -> u32 {
88 self.child.num_columns()
89 }
90
91 fn finish(
92 &mut self,
93 external_buffers: &mut OutOfLineBuffers,
94 ) -> BoxFuture<'_, Result<Vec<crate::encoder::EncodedColumn>>> {
95 self.child.finish(external_buffers)
96 }
97}
98
99#[derive(Debug)]
100pub struct StructuralListScheduler {
101 child: Box<dyn StructuralFieldScheduler>,
102}
103
104impl StructuralListScheduler {
105 pub fn new(child: Box<dyn StructuralFieldScheduler>) -> Self {
106 Self { child }
107 }
108}
109
110impl StructuralFieldScheduler for StructuralListScheduler {
111 fn schedule_ranges<'a>(
112 &'a self,
113 ranges: &[Range<u64>],
114 filter: &FilterExpression,
115 ) -> Result<Box<dyn StructuralSchedulingJob + 'a>> {
116 let child = self.child.schedule_ranges(ranges, filter)?;
117
118 Ok(Box::new(StructuralListSchedulingJob::new(child)))
119 }
120
121 fn initialize<'a>(
122 &'a mut self,
123 filter: &'a FilterExpression,
124 context: &'a SchedulerContext,
125 ) -> BoxFuture<'a, Result<()>> {
126 self.child.initialize(filter, context)
127 }
128}
129
130#[derive(Debug)]
135struct StructuralListSchedulingJob<'a> {
136 child: Box<dyn StructuralSchedulingJob + 'a>,
137}
138
139impl<'a> StructuralListSchedulingJob<'a> {
140 fn new(child: Box<dyn StructuralSchedulingJob + 'a>) -> Self {
141 Self { child }
142 }
143}
144
145impl StructuralSchedulingJob for StructuralListSchedulingJob<'_> {
146 fn schedule_next(
147 &mut self,
148 context: &mut SchedulerContext,
149 ) -> Result<Option<ScheduledScanLine>> {
150 self.child.schedule_next(context)
151 }
152}
153
154#[derive(Debug)]
155pub struct StructuralListDecoder {
156 child: Box<dyn StructuralFieldDecoder>,
157 data_type: DataType,
158}
159
160impl StructuralListDecoder {
161 pub fn new(child: Box<dyn StructuralFieldDecoder>, data_type: DataType) -> Self {
162 Self { child, data_type }
163 }
164}
165
166impl StructuralFieldDecoder for StructuralListDecoder {
167 fn accept_page(&mut self, child: crate::decoder::LoadedPage) -> Result<()> {
168 self.child.accept_page(child)
169 }
170
171 fn drain(&mut self, num_rows: u64) -> Result<Box<dyn StructuralDecodeArrayTask>> {
172 let child_task = self.child.drain(num_rows)?;
173 Ok(Box::new(StructuralListDecodeTask::new(
174 child_task,
175 self.data_type.clone(),
176 )))
177 }
178
179 fn data_type(&self) -> &DataType {
180 &self.data_type
181 }
182}
183
184#[derive(Debug)]
185struct StructuralListDecodeTask {
186 child_task: Box<dyn StructuralDecodeArrayTask>,
187 data_type: DataType,
188}
189
190impl StructuralListDecodeTask {
191 fn new(child_task: Box<dyn StructuralDecodeArrayTask>, data_type: DataType) -> Self {
192 Self {
193 child_task,
194 data_type,
195 }
196 }
197}
198
199impl StructuralDecodeArrayTask for StructuralListDecodeTask {
200 fn decode(self: Box<Self>) -> Result<DecodedArray> {
201 let DecodedArray { array, mut repdef } = self.child_task.decode()?;
202 match &self.data_type {
203 DataType::List(child_field) => {
204 let (offsets, validity) = repdef.unravel_offsets::<i32>()?;
205 let list_array = ListArray::try_new(child_field.clone(), offsets, array, validity)?;
206 Ok(DecodedArray {
207 array: Arc::new(list_array),
208 repdef,
209 })
210 }
211 DataType::LargeList(child_field) => {
212 let (offsets, validity) = repdef.unravel_offsets::<i64>()?;
213 let list_array =
214 LargeListArray::try_new(child_field.clone(), offsets, array, validity)?;
215 Ok(DecodedArray {
216 array: Arc::new(list_array),
217 repdef,
218 })
219 }
220 _ => panic!("List decoder did not have a list field"),
221 }
222 }
223}
224
225#[cfg(test)]
226mod tests {
227
228 use std::{collections::HashMap, sync::Arc};
229
230 use arrow::array::{Int64Builder, LargeListBuilder, StringBuilder};
231 use arrow_array::{
232 builder::{Int32Builder, ListBuilder},
233 Array, ArrayRef, BooleanArray, DictionaryArray, LargeStringArray, ListArray, StructArray,
234 UInt64Array, UInt8Array,
235 };
236 use arrow_buffer::{BooleanBuffer, NullBuffer, OffsetBuffer, ScalarBuffer};
237 use arrow_schema::{DataType, Field, Fields};
238 use lance_core::datatypes::{
239 STRUCTURAL_ENCODING_FULLZIP, STRUCTURAL_ENCODING_META_KEY, STRUCTURAL_ENCODING_MINIBLOCK,
240 };
241 use rstest::rstest;
242
243 use crate::{
244 testing::{check_round_trip_encoding_of_data, check_round_trip_encoding_random, TestCases},
245 version::LanceFileVersion,
246 };
247
248 fn make_list_type(inner_type: DataType) -> DataType {
249 DataType::List(Arc::new(Field::new("item", inner_type, true)))
250 }
251
252 fn make_large_list_type(inner_type: DataType) -> DataType {
253 DataType::LargeList(Arc::new(Field::new("item", inner_type, true)))
254 }
255
256 #[rstest]
257 #[test_log::test(tokio::test)]
258 async fn test_list(
259 #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)] version: LanceFileVersion,
260 #[values(STRUCTURAL_ENCODING_MINIBLOCK, STRUCTURAL_ENCODING_FULLZIP)]
261 structural_encoding: &str,
262 ) {
263 let mut field_metadata = HashMap::new();
264 field_metadata.insert(
265 STRUCTURAL_ENCODING_META_KEY.to_string(),
266 structural_encoding.into(),
267 );
268 let field =
269 Field::new("", make_list_type(DataType::Int32), true).with_metadata(field_metadata);
270 check_round_trip_encoding_random(field, version).await;
271 }
272
273 #[rstest]
274 #[test_log::test(tokio::test)]
275 async fn test_deeply_nested_lists(
276 #[values(STRUCTURAL_ENCODING_MINIBLOCK, STRUCTURAL_ENCODING_FULLZIP)]
277 structural_encoding: &str,
278 ) {
279 let mut field_metadata = HashMap::new();
280 field_metadata.insert(
281 STRUCTURAL_ENCODING_META_KEY.to_string(),
282 structural_encoding.into(),
283 );
284 let field = Field::new("item", DataType::Int32, true).with_metadata(field_metadata);
285 for _ in 0..5 {
286 let field = Field::new("", make_list_type(field.data_type().clone()), true);
287 check_round_trip_encoding_random(field, LanceFileVersion::V2_0).await;
288 }
289 }
290
291 #[test_log::test(tokio::test)]
292 async fn test_large_list() {
293 let field = Field::new("", make_large_list_type(DataType::Int32), true);
294 check_round_trip_encoding_random(field, LanceFileVersion::V2_0).await;
295 }
296
297 #[test_log::test(tokio::test)]
298 async fn test_nested_strings() {
299 let field = Field::new("", make_list_type(DataType::Utf8), true);
300 check_round_trip_encoding_random(field, LanceFileVersion::V2_0).await;
301 }
302
303 #[test_log::test(tokio::test)]
304 async fn test_nested_list() {
305 let field = Field::new("", make_list_type(make_list_type(DataType::Int32)), true);
306 check_round_trip_encoding_random(field, LanceFileVersion::V2_0).await;
307 }
308
309 #[test_log::test(tokio::test)]
310 async fn test_list_struct_list() {
311 let struct_type = DataType::Struct(Fields::from(vec![Field::new(
312 "inner_str",
313 DataType::Utf8,
314 false,
315 )]));
316
317 let field = Field::new("", make_list_type(struct_type), true);
318 check_round_trip_encoding_random(field, LanceFileVersion::V2_0).await;
319 }
320
321 #[test_log::test(tokio::test)]
322 async fn test_list_struct_empty() {
323 let fields = Fields::from(vec![Field::new("inner", DataType::UInt64, true)]);
324 let items = UInt64Array::from(Vec::<u64>::new());
325 let structs = StructArray::new(fields, vec![Arc::new(items)], None);
326 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0; 2 * 1024 * 1024 + 1]));
327 let lists = ListArray::new(
328 Arc::new(Field::new("item", structs.data_type().clone(), true)),
329 offsets,
330 Arc::new(structs),
331 None,
332 );
333
334 check_round_trip_encoding_of_data(
335 vec![Arc::new(lists)],
336 &TestCases::default(),
337 HashMap::new(),
338 )
339 .await;
340 }
341
342 #[rstest]
343 #[test_log::test(tokio::test)]
344 async fn test_simple_list(
345 #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)] version: LanceFileVersion,
346 #[values(STRUCTURAL_ENCODING_MINIBLOCK, STRUCTURAL_ENCODING_FULLZIP)]
347 structural_encoding: &str,
348 ) {
349 let items_builder = Int32Builder::new();
350 let mut list_builder = ListBuilder::new(items_builder);
351 list_builder.append_value([Some(1), Some(2), Some(3)]);
352 list_builder.append_value([Some(4), Some(5)]);
353 list_builder.append_null();
354 list_builder.append_value([Some(6), Some(7), Some(8)]);
355 let list_array = list_builder.finish();
356
357 let mut field_metadata = HashMap::new();
358 field_metadata.insert(
359 STRUCTURAL_ENCODING_META_KEY.to_string(),
360 structural_encoding.into(),
361 );
362
363 let test_cases = TestCases::default()
364 .with_range(0..2)
365 .with_range(0..3)
366 .with_range(1..3)
367 .with_indices(vec![1, 3])
368 .with_indices(vec![2])
369 .with_file_version(version);
370 check_round_trip_encoding_of_data(vec![Arc::new(list_array)], &test_cases, field_metadata)
371 .await;
372 }
373
374 #[rstest]
375 #[test_log::test(tokio::test)]
376 async fn test_simple_nested_list_ends_with_null(
377 #[values(STRUCTURAL_ENCODING_MINIBLOCK, STRUCTURAL_ENCODING_FULLZIP)]
378 structural_encoding: &str,
379 ) {
380 use arrow_array::Int32Array;
381
382 let values = Int32Array::from(vec![1, 2, 3, 4, 5]);
383 let inner_offsets = ScalarBuffer::<i32>::from(vec![0, 1, 2, 3, 4, 5, 5]);
384 let inner_validity = BooleanBuffer::from(vec![true, true, true, true, true, false]);
385 let outer_offsets = ScalarBuffer::<i32>::from(vec![0, 1, 2, 3, 4, 5, 6, 6]);
386 let outer_validity = BooleanBuffer::from(vec![true, true, true, true, true, true, false]);
387
388 let inner_list = ListArray::new(
389 Arc::new(Field::new("item", DataType::Int32, true)),
390 OffsetBuffer::new(inner_offsets),
391 Arc::new(values),
392 Some(NullBuffer::new(inner_validity)),
393 );
394 let outer_list = ListArray::new(
395 Arc::new(Field::new(
396 "item",
397 DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
398 true,
399 )),
400 OffsetBuffer::new(outer_offsets),
401 Arc::new(inner_list),
402 Some(NullBuffer::new(outer_validity)),
403 );
404
405 let mut field_metadata = HashMap::new();
406 field_metadata.insert(
407 STRUCTURAL_ENCODING_META_KEY.to_string(),
408 structural_encoding.into(),
409 );
410
411 let test_cases = TestCases::default()
412 .with_range(0..2)
413 .with_range(0..3)
414 .with_range(5..7)
415 .with_indices(vec![1, 6])
416 .with_indices(vec![6])
417 .with_file_version(LanceFileVersion::V2_1);
418 check_round_trip_encoding_of_data(vec![Arc::new(outer_list)], &test_cases, field_metadata)
419 .await;
420 }
421
422 #[rstest]
423 #[test_log::test(tokio::test)]
424 async fn test_simple_string_list(
425 #[values(STRUCTURAL_ENCODING_MINIBLOCK, STRUCTURAL_ENCODING_FULLZIP)]
426 structural_encoding: &str,
427 ) {
428 let items_builder = StringBuilder::new();
429 let mut list_builder = ListBuilder::new(items_builder);
430 list_builder.append_value([Some("a"), Some("bc"), Some("def")]);
431 list_builder.append_value([Some("gh"), None]);
432 list_builder.append_null();
433 list_builder.append_value([Some("ijk"), Some("lmnop"), Some("qrs")]);
434 let list_array = list_builder.finish();
435
436 let mut field_metadata = HashMap::new();
437 field_metadata.insert(
438 STRUCTURAL_ENCODING_META_KEY.to_string(),
439 structural_encoding.into(),
440 );
441
442 let test_cases = TestCases::default()
443 .with_range(0..2)
444 .with_range(0..3)
445 .with_range(1..3)
446 .with_indices(vec![1, 3])
447 .with_indices(vec![2])
448 .with_file_version(LanceFileVersion::V2_1);
449 check_round_trip_encoding_of_data(vec![Arc::new(list_array)], &test_cases, field_metadata)
450 .await;
451 }
452
453 #[rstest]
454 #[test_log::test(tokio::test)]
455 async fn test_simple_sliced_list(
456 #[values(STRUCTURAL_ENCODING_MINIBLOCK, STRUCTURAL_ENCODING_FULLZIP)]
457 structural_encoding: &str,
458 ) {
459 let items_builder = Int32Builder::new();
460 let mut list_builder = ListBuilder::new(items_builder);
461 list_builder.append_value([Some(1), Some(2), Some(3)]);
462 list_builder.append_value([Some(4), Some(5)]);
463 list_builder.append_null();
464 list_builder.append_value([Some(6), Some(7), Some(8)]);
465 let list_array = list_builder.finish();
466
467 let list_array = list_array.slice(1, 2);
468
469 let mut field_metadata = HashMap::new();
470 field_metadata.insert(
471 STRUCTURAL_ENCODING_META_KEY.to_string(),
472 structural_encoding.into(),
473 );
474
475 let test_cases = TestCases::default()
476 .with_range(0..2)
477 .with_range(1..2)
478 .with_indices(vec![0])
479 .with_indices(vec![1])
480 .with_file_version(LanceFileVersion::V2_1);
481 check_round_trip_encoding_of_data(vec![Arc::new(list_array)], &test_cases, field_metadata)
482 .await;
483 }
484
485 #[test_log::test(tokio::test)]
486 async fn test_simple_list_dict() {
487 let values = LargeStringArray::from_iter_values(["a", "bb", "ccc"]);
488 let indices = UInt8Array::from(vec![0, 1, 2, 0, 1, 2, 0, 1, 2]);
489 let dict_array = DictionaryArray::new(indices, Arc::new(values));
490 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 6, 9]));
491 let list_array = ListArray::new(
492 Arc::new(Field::new("item", dict_array.data_type().clone(), true)),
493 offsets,
494 Arc::new(dict_array),
495 None,
496 );
497
498 let test_cases = TestCases::default()
499 .with_range(0..2)
500 .with_range(1..3)
501 .with_range(2..4)
502 .with_indices(vec![1])
503 .with_indices(vec![2]);
504 check_round_trip_encoding_of_data(
505 vec![Arc::new(list_array)],
506 &test_cases,
507 HashMap::default(),
508 )
509 .await;
510 }
511
512 #[rstest]
513 #[test_log::test(tokio::test)]
514 async fn test_list_with_garbage_nulls(
515 #[values(STRUCTURAL_ENCODING_MINIBLOCK, STRUCTURAL_ENCODING_FULLZIP)]
516 structural_encoding: &str,
517 ) {
518 let items = UInt64Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
521 let offsets = ScalarBuffer::<i32>::from(vec![0, 5, 8, 10]);
522 let offsets = OffsetBuffer::new(offsets);
523 let list_validity = NullBuffer::new(BooleanBuffer::from(vec![true, false, true]));
524 let list_arr = ListArray::new(
525 Arc::new(Field::new("item", DataType::UInt64, true)),
526 offsets,
527 Arc::new(items),
528 Some(list_validity),
529 );
530
531 let mut field_metadata = HashMap::new();
532 field_metadata.insert(
533 STRUCTURAL_ENCODING_META_KEY.to_string(),
534 structural_encoding.into(),
535 );
536
537 let test_cases = TestCases::default()
538 .with_range(0..3)
539 .with_range(1..2)
540 .with_indices(vec![1])
541 .with_indices(vec![2])
542 .with_file_version(LanceFileVersion::V2_1);
543 check_round_trip_encoding_of_data(vec![Arc::new(list_arr)], &test_cases, field_metadata)
544 .await;
545 }
546
547 #[rstest]
548 #[test_log::test(tokio::test)]
549 async fn test_simple_two_page_list(
550 #[values(STRUCTURAL_ENCODING_MINIBLOCK, STRUCTURAL_ENCODING_FULLZIP)]
551 structural_encoding: &str,
552 ) {
553 let items_builder = Int64Builder::new();
556 let mut list_builder = ListBuilder::new(items_builder);
557 for i in 0..512 {
558 list_builder.append_value([Some(i), Some(i * 2)]);
559 }
560 let list_array_1 = list_builder.finish();
561
562 let items_builder = Int64Builder::new();
563 let mut list_builder = ListBuilder::new(items_builder);
564 for i in 0..512 {
565 let i = i + 512;
566 list_builder.append_value([Some(i), Some(i * 2)]);
567 }
568 let list_array_2 = list_builder.finish();
569
570 let mut metadata = HashMap::new();
571 metadata.insert(
572 STRUCTURAL_ENCODING_META_KEY.to_string(),
573 structural_encoding.into(),
574 );
575
576 let test_cases = TestCases::default()
577 .with_file_version(LanceFileVersion::V2_1)
578 .with_page_sizes(vec![100])
579 .with_range(800..900);
580 check_round_trip_encoding_of_data(
581 vec![Arc::new(list_array_1), Arc::new(list_array_2)],
582 &test_cases,
583 metadata,
584 )
585 .await;
586 }
587
588 #[test_log::test(tokio::test)]
589 async fn test_simple_large_list() {
590 let items_builder = Int32Builder::new();
591 let mut list_builder = LargeListBuilder::new(items_builder);
592 list_builder.append_value([Some(1), Some(2), Some(3)]);
593 list_builder.append_value([Some(4), Some(5)]);
594 list_builder.append_null();
595 list_builder.append_value([Some(6), Some(7), Some(8)]);
596 let list_array = list_builder.finish();
597
598 let test_cases = TestCases::default()
599 .with_range(0..2)
600 .with_range(0..3)
601 .with_range(1..3)
602 .with_indices(vec![1, 3]);
603 check_round_trip_encoding_of_data(vec![Arc::new(list_array)], &test_cases, HashMap::new())
604 .await;
605 }
606
607 #[rstest]
608 #[test_log::test(tokio::test)]
609 async fn test_empty_lists(
610 #[values(LanceFileVersion::V2_0, LanceFileVersion::V2_1)] version: LanceFileVersion,
611 #[values(STRUCTURAL_ENCODING_MINIBLOCK, STRUCTURAL_ENCODING_FULLZIP)]
612 structural_encoding: &str,
613 ) {
614 let mut field_metadata = HashMap::new();
615 field_metadata.insert(
616 STRUCTURAL_ENCODING_META_KEY.to_string(),
617 structural_encoding.into(),
618 );
619
620 let values = [vec![Some(1), Some(2), Some(3)], vec![], vec![None]];
623 for order in [[0, 1, 2], [1, 0, 2], [2, 0, 1]] {
625 let items_builder = Int32Builder::new();
626 let mut list_builder = ListBuilder::new(items_builder);
627 for idx in order {
628 list_builder.append_value(values[idx].clone());
629 }
630 let list_array = Arc::new(list_builder.finish());
631 let test_cases = TestCases::default()
632 .with_indices(vec![1])
633 .with_indices(vec![0])
634 .with_indices(vec![2])
635 .with_indices(vec![0, 1])
636 .with_file_version(version);
637 check_round_trip_encoding_of_data(
638 vec![list_array.clone()],
639 &test_cases,
640 field_metadata.clone(),
641 )
642 .await;
643 let test_cases = test_cases.with_batch_size(1);
644 check_round_trip_encoding_of_data(
645 vec![list_array],
646 &test_cases,
647 field_metadata.clone(),
648 )
649 .await;
650 }
651
652 let items_builder = Int32Builder::new();
657 let mut list_builder = ListBuilder::new(items_builder);
658 list_builder.append(true);
659 list_builder.append_null();
660 list_builder.append(true);
661 let list_array = Arc::new(list_builder.finish());
662
663 let test_cases = TestCases::default()
664 .with_range(0..2)
665 .with_indices(vec![1])
666 .with_file_version(version);
667 check_round_trip_encoding_of_data(
668 vec![list_array.clone()],
669 &test_cases,
670 field_metadata.clone(),
671 )
672 .await;
673 let test_cases = test_cases.with_batch_size(1);
674 check_round_trip_encoding_of_data(vec![list_array], &test_cases, field_metadata.clone())
675 .await;
676
677 let items_builder = StringBuilder::new();
682 let mut list_builder = ListBuilder::new(items_builder);
683 list_builder.append(true);
684 list_builder.append_null();
685 list_builder.append(true);
686 let list_array = Arc::new(list_builder.finish());
687
688 let test_cases = TestCases::default()
689 .with_range(0..2)
690 .with_indices(vec![1])
691 .with_file_version(version);
692 check_round_trip_encoding_of_data(
693 vec![list_array.clone()],
694 &test_cases,
695 field_metadata.clone(),
696 )
697 .await;
698 let test_cases = test_cases.with_batch_size(1);
699 check_round_trip_encoding_of_data(vec![list_array], &test_cases, field_metadata.clone())
700 .await;
701
702 let items_builder = Int32Builder::new();
705 let mut list_builder = ListBuilder::new(items_builder);
706 list_builder.append_null();
707 list_builder.append_null();
708 list_builder.append_null();
709 let list_array = Arc::new(list_builder.finish());
710
711 let test_cases = TestCases::default()
712 .with_range(0..2)
713 .with_indices(vec![1])
714 .with_file_version(version);
715 check_round_trip_encoding_of_data(
716 vec![list_array.clone()],
717 &test_cases,
718 field_metadata.clone(),
719 )
720 .await;
721 let test_cases = test_cases.with_batch_size(1);
722 check_round_trip_encoding_of_data(vec![list_array], &test_cases, field_metadata.clone())
723 .await;
724
725 if version < LanceFileVersion::V2_1 {
726 return;
727 }
728
729 let items_builder = Int32Builder::new();
732 let mut list_builder = ListBuilder::new(items_builder);
733 list_builder.append_null();
734 list_builder.append_null();
735 list_builder.append_null();
736 let list_array = Arc::new(list_builder.finish());
737
738 let struct_validity = NullBuffer::new(BooleanBuffer::from(vec![true, false, true]));
739 let struct_array = Arc::new(StructArray::new(
740 Fields::from(vec![Field::new(
741 "lists",
742 list_array.data_type().clone(),
743 true,
744 )]),
745 vec![list_array],
746 Some(struct_validity),
747 ));
748
749 let test_cases = TestCases::default()
750 .with_range(0..2)
751 .with_indices(vec![1])
752 .with_file_version(version);
753 check_round_trip_encoding_of_data(
754 vec![struct_array.clone()],
755 &test_cases,
756 field_metadata.clone(),
757 )
758 .await;
759 let test_cases = test_cases.with_batch_size(1);
760 check_round_trip_encoding_of_data(vec![struct_array], &test_cases, field_metadata.clone())
761 .await;
762 }
763
764 #[test_log::test(tokio::test)]
765 #[ignore] async fn test_jumbo_list() {
767 let items = BooleanArray::new_null(1024 * 1024);
771 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 1024 * 1024]));
772 let list_arr = Arc::new(ListArray::new(
773 Arc::new(Field::new("item", DataType::Boolean, true)),
774 offsets,
775 Arc::new(items),
776 None,
777 )) as ArrayRef;
778 let arrs = vec![list_arr; 5000];
779
780 let test_cases = TestCases::default().without_validation();
782 check_round_trip_encoding_of_data(arrs, &test_cases, HashMap::new()).await;
783 }
784}