1use crate::array::{get_offsets, print_long_array};
19use crate::iterator::MapArrayIter;
20use crate::{Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray, make_array};
21use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field, FieldRef};
24use std::any::Any;
25use std::sync::Arc;
26
27#[derive(Clone)]
36pub struct MapArray {
37 data_type: DataType,
38 nulls: Option<NullBuffer>,
39 entries: StructArray,
41 value_offsets: OffsetBuffer<i32>,
43}
44
45impl MapArray {
46 pub fn try_new(
62 field: FieldRef,
63 offsets: OffsetBuffer<i32>,
64 entries: StructArray,
65 nulls: Option<NullBuffer>,
66 ordered: bool,
67 ) -> Result<Self, ArrowError> {
68 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
70 if end_offset > entries.len() {
73 return Err(ArrowError::InvalidArgumentError(format!(
74 "Max offset of {end_offset} exceeds length of entries {}",
75 entries.len()
76 )));
77 }
78
79 if let Some(n) = nulls.as_ref() {
80 if n.len() != len {
81 return Err(ArrowError::InvalidArgumentError(format!(
82 "Incorrect length of null buffer for MapArray, expected {len} got {}",
83 n.len(),
84 )));
85 }
86 }
87 if field.is_nullable() || entries.null_count() != 0 {
88 return Err(ArrowError::InvalidArgumentError(
89 "MapArray entries cannot contain nulls".to_string(),
90 ));
91 }
92
93 if field.data_type() != entries.data_type() {
94 return Err(ArrowError::InvalidArgumentError(format!(
95 "MapArray expected data type {} got {} for {:?}",
96 field.data_type(),
97 entries.data_type(),
98 field.name()
99 )));
100 }
101
102 if entries.columns().len() != 2 {
103 return Err(ArrowError::InvalidArgumentError(format!(
104 "MapArray entries must contain two children, got {}",
105 entries.columns().len()
106 )));
107 }
108
109 Ok(Self {
110 data_type: DataType::Map(field, ordered),
111 nulls,
112 entries,
113 value_offsets: offsets,
114 })
115 }
116
117 pub fn new(
126 field: FieldRef,
127 offsets: OffsetBuffer<i32>,
128 entries: StructArray,
129 nulls: Option<NullBuffer>,
130 ordered: bool,
131 ) -> Self {
132 Self::try_new(field, offsets, entries, nulls, ordered).unwrap()
133 }
134
135 pub fn into_parts(
137 self,
138 ) -> (
139 FieldRef,
140 OffsetBuffer<i32>,
141 StructArray,
142 Option<NullBuffer>,
143 bool,
144 ) {
145 let (f, ordered) = match self.data_type {
146 DataType::Map(f, ordered) => (f, ordered),
147 _ => unreachable!(),
148 };
149 (f, self.value_offsets, self.entries, self.nulls, ordered)
150 }
151
152 #[inline]
157 pub fn offsets(&self) -> &OffsetBuffer<i32> {
158 &self.value_offsets
159 }
160
161 pub fn keys(&self) -> &ArrayRef {
163 self.entries.column(0)
164 }
165
166 pub fn values(&self) -> &ArrayRef {
168 self.entries.column(1)
169 }
170
171 pub fn entries(&self) -> &StructArray {
173 &self.entries
174 }
175
176 pub fn entries_fields(&self) -> (&Field, &Field) {
178 let fields = self.entries.fields().iter().collect::<Vec<_>>();
179 let fields = TryInto::<[&FieldRef; 2]>::try_into(fields)
180 .expect("Every map has a key and value field");
181
182 (fields[0].as_ref(), fields[1].as_ref())
183 }
184
185 pub fn key_type(&self) -> &DataType {
187 self.keys().data_type()
188 }
189
190 pub fn value_type(&self) -> &DataType {
192 self.values().data_type()
193 }
194
195 pub unsafe fn value_unchecked(&self, i: usize) -> StructArray {
203 let end = *unsafe { self.value_offsets().get_unchecked(i + 1) };
204 let start = *unsafe { self.value_offsets().get_unchecked(i) };
205 self.entries
206 .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
207 }
208
209 pub fn value(&self, i: usize) -> StructArray {
219 let end = self.value_offsets()[i + 1] as usize;
220 let start = self.value_offsets()[i] as usize;
221 self.entries.slice(start, end - start)
222 }
223
224 #[inline]
226 pub fn value_offsets(&self) -> &[i32] {
227 &self.value_offsets
228 }
229
230 #[inline]
232 pub fn value_length(&self, i: usize) -> i32 {
233 let offsets = self.value_offsets();
234 offsets[i + 1] - offsets[i]
235 }
236
237 pub fn slice(&self, offset: usize, length: usize) -> Self {
239 Self {
240 data_type: self.data_type.clone(),
241 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
242 entries: self.entries.clone(),
243 value_offsets: self.value_offsets.slice(offset, length),
244 }
245 }
246
247 pub fn iter(&self) -> MapArrayIter<'_> {
249 MapArrayIter::new(self)
250 }
251}
252
253impl From<ArrayData> for MapArray {
254 fn from(data: ArrayData) -> Self {
255 Self::try_new_from_array_data(data)
256 .expect("Expected infallible creation of MapArray from ArrayData failed")
257 }
258}
259
260impl From<MapArray> for ArrayData {
261 fn from(array: MapArray) -> Self {
262 let len = array.len();
263 let builder = ArrayDataBuilder::new(array.data_type)
264 .len(len)
265 .nulls(array.nulls)
266 .buffers(vec![array.value_offsets.into_inner().into_inner()])
267 .child_data(vec![array.entries.to_data()]);
268
269 unsafe { builder.build_unchecked() }
270 }
271}
272
273impl MapArray {
274 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
275 if !matches!(data.data_type(), DataType::Map(_, _)) {
276 return Err(ArrowError::InvalidArgumentError(format!(
277 "MapArray expected ArrayData with DataType::Map got {}",
278 data.data_type()
279 )));
280 }
281
282 if data.buffers().len() != 1 {
283 return Err(ArrowError::InvalidArgumentError(format!(
284 "MapArray data should contain a single buffer only (value offsets), had {}",
285 data.len()
286 )));
287 }
288
289 if data.child_data().len() != 1 {
290 return Err(ArrowError::InvalidArgumentError(format!(
291 "MapArray should contain a single child array (values array), had {}",
292 data.child_data().len()
293 )));
294 }
295
296 let entries = data.child_data()[0].clone();
297
298 if let DataType::Struct(fields) = entries.data_type() {
299 if fields.len() != 2 {
300 return Err(ArrowError::InvalidArgumentError(format!(
301 "MapArray should contain a struct array with 2 fields, have {} fields",
302 fields.len()
303 )));
304 }
305 } else {
306 return Err(ArrowError::InvalidArgumentError(format!(
307 "MapArray should contain a struct array child, found {:?}",
308 entries.data_type()
309 )));
310 }
311 let entries = entries.into();
312
313 let value_offsets = unsafe { get_offsets(&data) };
316
317 Ok(Self {
318 data_type: data.data_type().clone(),
319 nulls: data.nulls().cloned(),
320 entries,
321 value_offsets,
322 })
323 }
324
325 pub fn new_from_strings<'a>(
327 keys: impl Iterator<Item = &'a str>,
328 values: &dyn Array,
329 entry_offsets: &[u32],
330 ) -> Result<Self, ArrowError> {
331 let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
332 let keys_data = StringArray::from_iter_values(keys);
333
334 let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
335 let values_field = Arc::new(Field::new(
336 "values",
337 values.data_type().clone(),
338 values.null_count() > 0,
339 ));
340
341 let entry_struct = StructArray::from(vec![
342 (keys_field, Arc::new(keys_data) as ArrayRef),
343 (values_field, make_array(values.to_data())),
344 ]);
345
346 let map_data_type = DataType::Map(
347 Arc::new(Field::new(
348 "entries",
349 entry_struct.data_type().clone(),
350 false,
351 )),
352 false,
353 );
354 let map_data = ArrayData::builder(map_data_type)
355 .len(entry_offsets.len() - 1)
356 .add_buffer(entry_offsets_buffer)
357 .add_child_data(entry_struct.into_data())
358 .build()?;
359
360 Ok(MapArray::from(map_data))
361 }
362}
363
364impl super::private::Sealed for MapArray {}
365
366impl Array for MapArray {
367 fn as_any(&self) -> &dyn Any {
368 self
369 }
370
371 fn to_data(&self) -> ArrayData {
372 self.clone().into_data()
373 }
374
375 fn into_data(self) -> ArrayData {
376 self.into()
377 }
378
379 fn data_type(&self) -> &DataType {
380 &self.data_type
381 }
382
383 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
384 Arc::new(self.slice(offset, length))
385 }
386
387 fn len(&self) -> usize {
388 self.value_offsets.len() - 1
389 }
390
391 fn is_empty(&self) -> bool {
392 self.value_offsets.len() <= 1
393 }
394
395 fn shrink_to_fit(&mut self) {
396 if let Some(nulls) = &mut self.nulls {
397 nulls.shrink_to_fit();
398 }
399 self.entries.shrink_to_fit();
400 self.value_offsets.shrink_to_fit();
401 }
402
403 fn offset(&self) -> usize {
404 0
405 }
406
407 fn nulls(&self) -> Option<&NullBuffer> {
408 self.nulls.as_ref()
409 }
410
411 fn logical_null_count(&self) -> usize {
412 self.null_count()
414 }
415
416 fn get_buffer_memory_size(&self) -> usize {
417 let mut size = self.entries.get_buffer_memory_size();
418 size += self.value_offsets.inner().inner().capacity();
419 if let Some(n) = self.nulls.as_ref() {
420 size += n.buffer().capacity();
421 }
422 size
423 }
424
425 fn get_array_memory_size(&self) -> usize {
426 let mut size = std::mem::size_of::<Self>() + self.entries.get_array_memory_size();
427 size += self.value_offsets.inner().inner().capacity();
428 if let Some(n) = self.nulls.as_ref() {
429 size += n.buffer().capacity();
430 }
431 size
432 }
433}
434
435impl ArrayAccessor for &MapArray {
436 type Item = StructArray;
437
438 fn value(&self, index: usize) -> Self::Item {
439 MapArray::value(self, index)
440 }
441
442 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
443 MapArray::value(self, index)
444 }
445}
446
447impl std::fmt::Debug for MapArray {
448 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
449 write!(f, "MapArray\n[\n")?;
450 print_long_array(self, f, |array, index, f| {
451 std::fmt::Debug::fmt(&array.value(index), f)
452 })?;
453 write!(f, "]")
454 }
455}
456
457impl From<MapArray> for ListArray {
458 fn from(value: MapArray) -> Self {
459 let field = match value.data_type() {
460 DataType::Map(field, _) => field,
461 _ => unreachable!("This should be a map type."),
462 };
463 let data_type = DataType::List(field.clone());
464 let builder = value.into_data().into_builder().data_type(data_type);
465 let array_data = unsafe { builder.build_unchecked() };
466
467 ListArray::from(array_data)
468 }
469}
470
471#[cfg(test)]
472mod tests {
473 use crate::cast::AsArray;
474 use crate::types::UInt32Type;
475 use crate::{Int32Array, UInt32Array};
476 use arrow_schema::Fields;
477
478 use super::*;
479
480 fn create_from_buffers() -> MapArray {
481 let keys_data = ArrayData::builder(DataType::Int32)
483 .len(8)
484 .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
485 .build()
486 .unwrap();
487 let values_data = ArrayData::builder(DataType::UInt32)
488 .len(8)
489 .add_buffer(Buffer::from(
490 [0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(),
491 ))
492 .build()
493 .unwrap();
494
495 let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
498
499 let keys = Arc::new(Field::new("keys", DataType::Int32, false));
500 let values = Arc::new(Field::new("values", DataType::UInt32, false));
501 let entry_struct = StructArray::from(vec![
502 (keys, make_array(keys_data)),
503 (values, make_array(values_data)),
504 ]);
505
506 let map_data_type = DataType::Map(
508 Arc::new(Field::new(
509 "entries",
510 entry_struct.data_type().clone(),
511 false,
512 )),
513 false,
514 );
515 let map_data = ArrayData::builder(map_data_type)
516 .len(3)
517 .add_buffer(entry_offsets)
518 .add_child_data(entry_struct.into_data())
519 .build()
520 .unwrap();
521 MapArray::from(map_data)
522 }
523
524 #[test]
525 fn test_map_array() {
526 let key_data = ArrayData::builder(DataType::Int32)
528 .len(8)
529 .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
530 .build()
531 .unwrap();
532 let value_data = ArrayData::builder(DataType::UInt32)
533 .len(8)
534 .add_buffer(Buffer::from(
535 [0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
536 ))
537 .null_bit_buffer(Some(Buffer::from(&[0b11010110])))
538 .build()
539 .unwrap();
540
541 let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
544
545 let keys_field = Arc::new(Field::new("keys", DataType::Int32, false));
546 let values_field = Arc::new(Field::new("values", DataType::UInt32, true));
547 let entry_struct = StructArray::from(vec![
548 (keys_field.clone(), make_array(key_data)),
549 (values_field.clone(), make_array(value_data.clone())),
550 ]);
551
552 let map_data_type = DataType::Map(
554 Arc::new(Field::new(
555 "entries",
556 entry_struct.data_type().clone(),
557 false,
558 )),
559 false,
560 );
561 let map_data = ArrayData::builder(map_data_type)
562 .len(3)
563 .add_buffer(entry_offsets)
564 .add_child_data(entry_struct.into_data())
565 .build()
566 .unwrap();
567 let map_array = MapArray::from(map_data);
568
569 assert_eq!(value_data, map_array.values().to_data());
570 assert_eq!(&DataType::UInt32, map_array.value_type());
571 assert_eq!(3, map_array.len());
572 assert_eq!(0, map_array.null_count());
573 assert_eq!(6, map_array.value_offsets()[2]);
574 assert_eq!(2, map_array.value_length(2));
575
576 let key_array = Arc::new(Int32Array::from(vec![0, 1, 2])) as ArrayRef;
577 let value_array =
578 Arc::new(UInt32Array::from(vec![None, Some(10u32), Some(20)])) as ArrayRef;
579 let struct_array = StructArray::from(vec![
580 (keys_field.clone(), key_array),
581 (values_field.clone(), value_array),
582 ]);
583 assert_eq!(
584 struct_array,
585 StructArray::from(map_array.value(0).into_data())
586 );
587 assert_eq!(
588 &struct_array,
589 unsafe { map_array.value_unchecked(0) }
590 .as_any()
591 .downcast_ref::<StructArray>()
592 .unwrap()
593 );
594 for i in 0..3 {
595 assert!(map_array.is_valid(i));
596 assert!(!map_array.is_null(i));
597 }
598
599 let map_array = map_array.slice(1, 2);
601
602 assert_eq!(value_data, map_array.values().to_data());
603 assert_eq!(&DataType::UInt32, map_array.value_type());
604 assert_eq!(2, map_array.len());
605 assert_eq!(0, map_array.null_count());
606 assert_eq!(6, map_array.value_offsets()[1]);
607 assert_eq!(2, map_array.value_length(1));
608
609 let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef;
610 let value_array = Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
611 let struct_array =
612 StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
613 assert_eq!(
614 &struct_array,
615 map_array
616 .value(0)
617 .as_any()
618 .downcast_ref::<StructArray>()
619 .unwrap()
620 );
621 assert_eq!(
622 &struct_array,
623 unsafe { map_array.value_unchecked(0) }
624 .as_any()
625 .downcast_ref::<StructArray>()
626 .unwrap()
627 );
628 }
629
630 #[test]
631 #[ignore = "Test fails because slice of <list<struct>> is still buggy"]
632 fn test_map_array_slice() {
633 let map_array = create_from_buffers();
634
635 let sliced_array = map_array.slice(1, 2);
636 assert_eq!(2, sliced_array.len());
637 assert_eq!(1, sliced_array.offset());
638 let sliced_array_data = sliced_array.to_data();
639 for array_data in sliced_array_data.child_data() {
640 assert_eq!(array_data.offset(), 1);
641 }
642
643 let sliced_map_array = sliced_array.as_any().downcast_ref::<MapArray>().unwrap();
645 assert_eq!(3, sliced_map_array.value_offsets()[0]);
646 assert_eq!(3, sliced_map_array.value_length(0));
647 assert_eq!(6, sliced_map_array.value_offsets()[1]);
648 assert_eq!(2, sliced_map_array.value_length(1));
649
650 let keys_data = ArrayData::builder(DataType::Int32)
652 .len(5)
653 .add_buffer(Buffer::from([3, 4, 5, 6, 7].to_byte_slice()))
654 .build()
655 .unwrap();
656 let values_data = ArrayData::builder(DataType::UInt32)
657 .len(5)
658 .add_buffer(Buffer::from([30u32, 40, 50, 60, 70].to_byte_slice()))
659 .build()
660 .unwrap();
661
662 let entry_offsets = Buffer::from([0, 3, 5].to_byte_slice());
665
666 let keys = Arc::new(Field::new("keys", DataType::Int32, false));
667 let values = Arc::new(Field::new("values", DataType::UInt32, false));
668 let entry_struct = StructArray::from(vec![
669 (keys, make_array(keys_data)),
670 (values, make_array(values_data)),
671 ]);
672
673 let map_data_type = DataType::Map(
675 Arc::new(Field::new(
676 "entries",
677 entry_struct.data_type().clone(),
678 false,
679 )),
680 false,
681 );
682 let expected_map_data = ArrayData::builder(map_data_type)
683 .len(2)
684 .add_buffer(entry_offsets)
685 .add_child_data(entry_struct.into_data())
686 .build()
687 .unwrap();
688 let expected_map_array = MapArray::from(expected_map_data);
689
690 assert_eq!(&expected_map_array, sliced_map_array)
691 }
692
693 #[test]
694 #[should_panic(expected = "index out of bounds: the len is ")]
695 fn test_map_array_index_out_of_bound() {
696 let map_array = create_from_buffers();
697
698 map_array.value(map_array.len());
699 }
700
701 #[test]
702 #[should_panic(expected = "MapArray expected ArrayData with DataType::Map got Dictionary")]
703 fn test_from_array_data_validation() {
704 let struct_t = DataType::Struct(Fields::from(vec![
707 Field::new("keys", DataType::Int32, true),
708 Field::new("values", DataType::UInt32, true),
709 ]));
710 let dict_t = DataType::Dictionary(Box::new(DataType::Int32), Box::new(struct_t));
711 let _ = MapArray::from(ArrayData::new_empty(&dict_t));
712 }
713
714 #[test]
715 fn test_new_from_strings() {
716 let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
717 let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
718
719 let entry_offsets = [0, 3, 6, 8];
722
723 let map_array =
724 MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
725 .unwrap();
726
727 assert_eq!(
728 &values_data,
729 map_array.values().as_primitive::<UInt32Type>()
730 );
731 assert_eq!(&DataType::UInt32, map_array.value_type());
732 assert_eq!(3, map_array.len());
733 assert_eq!(0, map_array.null_count());
734 assert_eq!(6, map_array.value_offsets()[2]);
735 assert_eq!(2, map_array.value_length(2));
736
737 let key_array = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
738 let value_array = Arc::new(UInt32Array::from(vec![0u32, 10, 20])) as ArrayRef;
739 let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
740 let values_field = Arc::new(Field::new("values", DataType::UInt32, false));
741 let struct_array =
742 StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
743 assert_eq!(
744 struct_array,
745 StructArray::from(map_array.value(0).into_data())
746 );
747 assert_eq!(
748 &struct_array,
749 unsafe { map_array.value_unchecked(0) }
750 .as_any()
751 .downcast_ref::<StructArray>()
752 .unwrap()
753 );
754 for i in 0..3 {
755 assert!(map_array.is_valid(i));
756 assert!(!map_array.is_null(i));
757 }
758 }
759
760 #[test]
761 fn test_try_new() {
762 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
763 let fields = Fields::from(vec![
764 Field::new("key", DataType::Int32, false),
765 Field::new("values", DataType::Int32, false),
766 ]);
767 let columns = vec![
768 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
769 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
770 ];
771
772 let entries = StructArray::new(fields.clone(), columns, None);
773 let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
774
775 MapArray::new(field.clone(), offsets.clone(), entries.clone(), None, false);
776
777 let nulls = NullBuffer::new_null(3);
778 MapArray::new(field.clone(), offsets, entries.clone(), Some(nulls), false);
779
780 let nulls = NullBuffer::new_null(3);
781 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
782 let err = MapArray::try_new(
783 field.clone(),
784 offsets.clone(),
785 entries.clone(),
786 Some(nulls),
787 false,
788 )
789 .unwrap_err();
790
791 assert_eq!(
792 err.to_string(),
793 "Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3"
794 );
795
796 let err = MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
797 .unwrap_err();
798
799 assert_eq!(
800 err.to_string(),
801 "Invalid argument error: Max offset of 5 exceeds length of entries 2"
802 );
803
804 let field = Arc::new(Field::new("element", DataType::Int64, false));
805 let err = MapArray::try_new(field, offsets.clone(), entries, None, false)
806 .unwrap_err()
807 .to_string();
808
809 assert!(
810 err.starts_with("Invalid argument error: MapArray expected data type Int64 got Struct"),
811 "{err}"
812 );
813
814 let fields = Fields::from(vec![
815 Field::new("a", DataType::Int32, false),
816 Field::new("b", DataType::Int32, false),
817 Field::new("c", DataType::Int32, false),
818 ]);
819 let columns = vec![
820 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
821 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
822 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
823 ];
824
825 let s = StructArray::new(fields.clone(), columns, None);
826 let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
827 let err = MapArray::try_new(field, offsets, s, None, false).unwrap_err();
828
829 assert_eq!(
830 err.to_string(),
831 "Invalid argument error: MapArray entries must contain two children, got 3"
832 );
833 }
834}