1use crate::array::{get_offsets, print_long_array};
19use crate::iterator::MapArrayIter;
20use crate::{make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray};
21use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field, FieldRef};
24use std::any::Any;
25use std::sync::Arc;
26
27#[derive(Clone)]
36pub struct MapArray {
37 data_type: DataType,
38 nulls: Option<NullBuffer>,
39 entries: StructArray,
41 value_offsets: OffsetBuffer<i32>,
43}
44
45impl MapArray {
46 pub fn try_new(
62 field: FieldRef,
63 offsets: OffsetBuffer<i32>,
64 entries: StructArray,
65 nulls: Option<NullBuffer>,
66 ordered: bool,
67 ) -> Result<Self, ArrowError> {
68 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
70 if end_offset > entries.len() {
73 return Err(ArrowError::InvalidArgumentError(format!(
74 "Max offset of {end_offset} exceeds length of entries {}",
75 entries.len()
76 )));
77 }
78
79 if let Some(n) = nulls.as_ref() {
80 if n.len() != len {
81 return Err(ArrowError::InvalidArgumentError(format!(
82 "Incorrect length of null buffer for MapArray, expected {len} got {}",
83 n.len(),
84 )));
85 }
86 }
87 if field.is_nullable() || entries.null_count() != 0 {
88 return Err(ArrowError::InvalidArgumentError(
89 "MapArray entries cannot contain nulls".to_string(),
90 ));
91 }
92
93 if field.data_type() != entries.data_type() {
94 return Err(ArrowError::InvalidArgumentError(format!(
95 "MapArray expected data type {} got {} for {:?}",
96 field.data_type(),
97 entries.data_type(),
98 field.name()
99 )));
100 }
101
102 if entries.columns().len() != 2 {
103 return Err(ArrowError::InvalidArgumentError(format!(
104 "MapArray entries must contain two children, got {}",
105 entries.columns().len()
106 )));
107 }
108
109 Ok(Self {
110 data_type: DataType::Map(field, ordered),
111 nulls,
112 entries,
113 value_offsets: offsets,
114 })
115 }
116
117 pub fn new(
126 field: FieldRef,
127 offsets: OffsetBuffer<i32>,
128 entries: StructArray,
129 nulls: Option<NullBuffer>,
130 ordered: bool,
131 ) -> Self {
132 Self::try_new(field, offsets, entries, nulls, ordered).unwrap()
133 }
134
135 pub fn into_parts(
137 self,
138 ) -> (
139 FieldRef,
140 OffsetBuffer<i32>,
141 StructArray,
142 Option<NullBuffer>,
143 bool,
144 ) {
145 let (f, ordered) = match self.data_type {
146 DataType::Map(f, ordered) => (f, ordered),
147 _ => unreachable!(),
148 };
149 (f, self.value_offsets, self.entries, self.nulls, ordered)
150 }
151
152 #[inline]
157 pub fn offsets(&self) -> &OffsetBuffer<i32> {
158 &self.value_offsets
159 }
160
161 pub fn keys(&self) -> &ArrayRef {
163 self.entries.column(0)
164 }
165
166 pub fn values(&self) -> &ArrayRef {
168 self.entries.column(1)
169 }
170
171 pub fn entries(&self) -> &StructArray {
173 &self.entries
174 }
175
176 pub fn key_type(&self) -> &DataType {
178 self.keys().data_type()
179 }
180
181 pub fn value_type(&self) -> &DataType {
183 self.values().data_type()
184 }
185
186 pub unsafe fn value_unchecked(&self, i: usize) -> StructArray {
194 let end = *self.value_offsets().get_unchecked(i + 1);
195 let start = *self.value_offsets().get_unchecked(i);
196 self.entries
197 .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
198 }
199
200 pub fn value(&self, i: usize) -> StructArray {
210 let end = self.value_offsets()[i + 1] as usize;
211 let start = self.value_offsets()[i] as usize;
212 self.entries.slice(start, end - start)
213 }
214
215 #[inline]
217 pub fn value_offsets(&self) -> &[i32] {
218 &self.value_offsets
219 }
220
221 #[inline]
223 pub fn value_length(&self, i: usize) -> i32 {
224 let offsets = self.value_offsets();
225 offsets[i + 1] - offsets[i]
226 }
227
228 pub fn slice(&self, offset: usize, length: usize) -> Self {
230 Self {
231 data_type: self.data_type.clone(),
232 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
233 entries: self.entries.clone(),
234 value_offsets: self.value_offsets.slice(offset, length),
235 }
236 }
237
238 pub fn iter(&self) -> MapArrayIter<'_> {
240 MapArrayIter::new(self)
241 }
242}
243
244impl From<ArrayData> for MapArray {
245 fn from(data: ArrayData) -> Self {
246 Self::try_new_from_array_data(data)
247 .expect("Expected infallible creation of MapArray from ArrayData failed")
248 }
249}
250
251impl From<MapArray> for ArrayData {
252 fn from(array: MapArray) -> Self {
253 let len = array.len();
254 let builder = ArrayDataBuilder::new(array.data_type)
255 .len(len)
256 .nulls(array.nulls)
257 .buffers(vec![array.value_offsets.into_inner().into_inner()])
258 .child_data(vec![array.entries.to_data()]);
259
260 unsafe { builder.build_unchecked() }
261 }
262}
263
264impl MapArray {
265 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
266 if !matches!(data.data_type(), DataType::Map(_, _)) {
267 return Err(ArrowError::InvalidArgumentError(format!(
268 "MapArray expected ArrayData with DataType::Map got {}",
269 data.data_type()
270 )));
271 }
272
273 if data.buffers().len() != 1 {
274 return Err(ArrowError::InvalidArgumentError(format!(
275 "MapArray data should contain a single buffer only (value offsets), had {}",
276 data.len()
277 )));
278 }
279
280 if data.child_data().len() != 1 {
281 return Err(ArrowError::InvalidArgumentError(format!(
282 "MapArray should contain a single child array (values array), had {}",
283 data.child_data().len()
284 )));
285 }
286
287 let entries = data.child_data()[0].clone();
288
289 if let DataType::Struct(fields) = entries.data_type() {
290 if fields.len() != 2 {
291 return Err(ArrowError::InvalidArgumentError(format!(
292 "MapArray should contain a struct array with 2 fields, have {} fields",
293 fields.len()
294 )));
295 }
296 } else {
297 return Err(ArrowError::InvalidArgumentError(format!(
298 "MapArray should contain a struct array child, found {:?}",
299 entries.data_type()
300 )));
301 }
302 let entries = entries.into();
303
304 let value_offsets = unsafe { get_offsets(&data) };
307
308 Ok(Self {
309 data_type: data.data_type().clone(),
310 nulls: data.nulls().cloned(),
311 entries,
312 value_offsets,
313 })
314 }
315
316 pub fn new_from_strings<'a>(
318 keys: impl Iterator<Item = &'a str>,
319 values: &dyn Array,
320 entry_offsets: &[u32],
321 ) -> Result<Self, ArrowError> {
322 let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
323 let keys_data = StringArray::from_iter_values(keys);
324
325 let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
326 let values_field = Arc::new(Field::new(
327 "values",
328 values.data_type().clone(),
329 values.null_count() > 0,
330 ));
331
332 let entry_struct = StructArray::from(vec![
333 (keys_field, Arc::new(keys_data) as ArrayRef),
334 (values_field, make_array(values.to_data())),
335 ]);
336
337 let map_data_type = DataType::Map(
338 Arc::new(Field::new(
339 "entries",
340 entry_struct.data_type().clone(),
341 false,
342 )),
343 false,
344 );
345 let map_data = ArrayData::builder(map_data_type)
346 .len(entry_offsets.len() - 1)
347 .add_buffer(entry_offsets_buffer)
348 .add_child_data(entry_struct.into_data())
349 .build()?;
350
351 Ok(MapArray::from(map_data))
352 }
353}
354
355impl Array for MapArray {
356 fn as_any(&self) -> &dyn Any {
357 self
358 }
359
360 fn to_data(&self) -> ArrayData {
361 self.clone().into_data()
362 }
363
364 fn into_data(self) -> ArrayData {
365 self.into()
366 }
367
368 fn data_type(&self) -> &DataType {
369 &self.data_type
370 }
371
372 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
373 Arc::new(self.slice(offset, length))
374 }
375
376 fn len(&self) -> usize {
377 self.value_offsets.len() - 1
378 }
379
380 fn is_empty(&self) -> bool {
381 self.value_offsets.len() <= 1
382 }
383
384 fn shrink_to_fit(&mut self) {
385 if let Some(nulls) = &mut self.nulls {
386 nulls.shrink_to_fit();
387 }
388 self.entries.shrink_to_fit();
389 self.value_offsets.shrink_to_fit();
390 }
391
392 fn offset(&self) -> usize {
393 0
394 }
395
396 fn nulls(&self) -> Option<&NullBuffer> {
397 self.nulls.as_ref()
398 }
399
400 fn logical_null_count(&self) -> usize {
401 self.null_count()
403 }
404
405 fn get_buffer_memory_size(&self) -> usize {
406 let mut size = self.entries.get_buffer_memory_size();
407 size += self.value_offsets.inner().inner().capacity();
408 if let Some(n) = self.nulls.as_ref() {
409 size += n.buffer().capacity();
410 }
411 size
412 }
413
414 fn get_array_memory_size(&self) -> usize {
415 let mut size = std::mem::size_of::<Self>() + self.entries.get_array_memory_size();
416 size += self.value_offsets.inner().inner().capacity();
417 if let Some(n) = self.nulls.as_ref() {
418 size += n.buffer().capacity();
419 }
420 size
421 }
422}
423
424impl ArrayAccessor for &MapArray {
425 type Item = StructArray;
426
427 fn value(&self, index: usize) -> Self::Item {
428 MapArray::value(self, index)
429 }
430
431 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
432 MapArray::value(self, index)
433 }
434}
435
436impl std::fmt::Debug for MapArray {
437 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
438 write!(f, "MapArray\n[\n")?;
439 print_long_array(self, f, |array, index, f| {
440 std::fmt::Debug::fmt(&array.value(index), f)
441 })?;
442 write!(f, "]")
443 }
444}
445
446impl From<MapArray> for ListArray {
447 fn from(value: MapArray) -> Self {
448 let field = match value.data_type() {
449 DataType::Map(field, _) => field,
450 _ => unreachable!("This should be a map type."),
451 };
452 let data_type = DataType::List(field.clone());
453 let builder = value.into_data().into_builder().data_type(data_type);
454 let array_data = unsafe { builder.build_unchecked() };
455
456 ListArray::from(array_data)
457 }
458}
459
460#[cfg(test)]
461mod tests {
462 use crate::cast::AsArray;
463 use crate::types::UInt32Type;
464 use crate::{Int32Array, UInt32Array};
465 use arrow_schema::Fields;
466
467 use super::*;
468
469 fn create_from_buffers() -> MapArray {
470 let keys_data = ArrayData::builder(DataType::Int32)
472 .len(8)
473 .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
474 .build()
475 .unwrap();
476 let values_data = ArrayData::builder(DataType::UInt32)
477 .len(8)
478 .add_buffer(Buffer::from(
479 [0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(),
480 ))
481 .build()
482 .unwrap();
483
484 let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
487
488 let keys = Arc::new(Field::new("keys", DataType::Int32, false));
489 let values = Arc::new(Field::new("values", DataType::UInt32, false));
490 let entry_struct = StructArray::from(vec![
491 (keys, make_array(keys_data)),
492 (values, make_array(values_data)),
493 ]);
494
495 let map_data_type = DataType::Map(
497 Arc::new(Field::new(
498 "entries",
499 entry_struct.data_type().clone(),
500 false,
501 )),
502 false,
503 );
504 let map_data = ArrayData::builder(map_data_type)
505 .len(3)
506 .add_buffer(entry_offsets)
507 .add_child_data(entry_struct.into_data())
508 .build()
509 .unwrap();
510 MapArray::from(map_data)
511 }
512
513 #[test]
514 fn test_map_array() {
515 let key_data = ArrayData::builder(DataType::Int32)
517 .len(8)
518 .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
519 .build()
520 .unwrap();
521 let value_data = ArrayData::builder(DataType::UInt32)
522 .len(8)
523 .add_buffer(Buffer::from(
524 [0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
525 ))
526 .null_bit_buffer(Some(Buffer::from(&[0b11010110])))
527 .build()
528 .unwrap();
529
530 let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
533
534 let keys_field = Arc::new(Field::new("keys", DataType::Int32, false));
535 let values_field = Arc::new(Field::new("values", DataType::UInt32, true));
536 let entry_struct = StructArray::from(vec![
537 (keys_field.clone(), make_array(key_data)),
538 (values_field.clone(), make_array(value_data.clone())),
539 ]);
540
541 let map_data_type = DataType::Map(
543 Arc::new(Field::new(
544 "entries",
545 entry_struct.data_type().clone(),
546 false,
547 )),
548 false,
549 );
550 let map_data = ArrayData::builder(map_data_type)
551 .len(3)
552 .add_buffer(entry_offsets)
553 .add_child_data(entry_struct.into_data())
554 .build()
555 .unwrap();
556 let map_array = MapArray::from(map_data);
557
558 assert_eq!(value_data, map_array.values().to_data());
559 assert_eq!(&DataType::UInt32, map_array.value_type());
560 assert_eq!(3, map_array.len());
561 assert_eq!(0, map_array.null_count());
562 assert_eq!(6, map_array.value_offsets()[2]);
563 assert_eq!(2, map_array.value_length(2));
564
565 let key_array = Arc::new(Int32Array::from(vec![0, 1, 2])) as ArrayRef;
566 let value_array =
567 Arc::new(UInt32Array::from(vec![None, Some(10u32), Some(20)])) as ArrayRef;
568 let struct_array = StructArray::from(vec![
569 (keys_field.clone(), key_array),
570 (values_field.clone(), value_array),
571 ]);
572 assert_eq!(
573 struct_array,
574 StructArray::from(map_array.value(0).into_data())
575 );
576 assert_eq!(
577 &struct_array,
578 unsafe { map_array.value_unchecked(0) }
579 .as_any()
580 .downcast_ref::<StructArray>()
581 .unwrap()
582 );
583 for i in 0..3 {
584 assert!(map_array.is_valid(i));
585 assert!(!map_array.is_null(i));
586 }
587
588 let map_array = map_array.slice(1, 2);
590
591 assert_eq!(value_data, map_array.values().to_data());
592 assert_eq!(&DataType::UInt32, map_array.value_type());
593 assert_eq!(2, map_array.len());
594 assert_eq!(0, map_array.null_count());
595 assert_eq!(6, map_array.value_offsets()[1]);
596 assert_eq!(2, map_array.value_length(1));
597
598 let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef;
599 let value_array = Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
600 let struct_array =
601 StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
602 assert_eq!(
603 &struct_array,
604 map_array
605 .value(0)
606 .as_any()
607 .downcast_ref::<StructArray>()
608 .unwrap()
609 );
610 assert_eq!(
611 &struct_array,
612 unsafe { map_array.value_unchecked(0) }
613 .as_any()
614 .downcast_ref::<StructArray>()
615 .unwrap()
616 );
617 }
618
619 #[test]
620 #[ignore = "Test fails because slice of <list<struct>> is still buggy"]
621 fn test_map_array_slice() {
622 let map_array = create_from_buffers();
623
624 let sliced_array = map_array.slice(1, 2);
625 assert_eq!(2, sliced_array.len());
626 assert_eq!(1, sliced_array.offset());
627 let sliced_array_data = sliced_array.to_data();
628 for array_data in sliced_array_data.child_data() {
629 assert_eq!(array_data.offset(), 1);
630 }
631
632 let sliced_map_array = sliced_array.as_any().downcast_ref::<MapArray>().unwrap();
634 assert_eq!(3, sliced_map_array.value_offsets()[0]);
635 assert_eq!(3, sliced_map_array.value_length(0));
636 assert_eq!(6, sliced_map_array.value_offsets()[1]);
637 assert_eq!(2, sliced_map_array.value_length(1));
638
639 let keys_data = ArrayData::builder(DataType::Int32)
641 .len(5)
642 .add_buffer(Buffer::from([3, 4, 5, 6, 7].to_byte_slice()))
643 .build()
644 .unwrap();
645 let values_data = ArrayData::builder(DataType::UInt32)
646 .len(5)
647 .add_buffer(Buffer::from([30u32, 40, 50, 60, 70].to_byte_slice()))
648 .build()
649 .unwrap();
650
651 let entry_offsets = Buffer::from([0, 3, 5].to_byte_slice());
654
655 let keys = Arc::new(Field::new("keys", DataType::Int32, false));
656 let values = Arc::new(Field::new("values", DataType::UInt32, false));
657 let entry_struct = StructArray::from(vec![
658 (keys, make_array(keys_data)),
659 (values, make_array(values_data)),
660 ]);
661
662 let map_data_type = DataType::Map(
664 Arc::new(Field::new(
665 "entries",
666 entry_struct.data_type().clone(),
667 false,
668 )),
669 false,
670 );
671 let expected_map_data = ArrayData::builder(map_data_type)
672 .len(2)
673 .add_buffer(entry_offsets)
674 .add_child_data(entry_struct.into_data())
675 .build()
676 .unwrap();
677 let expected_map_array = MapArray::from(expected_map_data);
678
679 assert_eq!(&expected_map_array, sliced_map_array)
680 }
681
682 #[test]
683 #[should_panic(expected = "index out of bounds: the len is ")]
684 fn test_map_array_index_out_of_bound() {
685 let map_array = create_from_buffers();
686
687 map_array.value(map_array.len());
688 }
689
690 #[test]
691 #[should_panic(expected = "MapArray expected ArrayData with DataType::Map got Dictionary")]
692 fn test_from_array_data_validation() {
693 let struct_t = DataType::Struct(Fields::from(vec![
696 Field::new("keys", DataType::Int32, true),
697 Field::new("values", DataType::UInt32, true),
698 ]));
699 let dict_t = DataType::Dictionary(Box::new(DataType::Int32), Box::new(struct_t));
700 let _ = MapArray::from(ArrayData::new_empty(&dict_t));
701 }
702
703 #[test]
704 fn test_new_from_strings() {
705 let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
706 let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
707
708 let entry_offsets = [0, 3, 6, 8];
711
712 let map_array =
713 MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
714 .unwrap();
715
716 assert_eq!(
717 &values_data,
718 map_array.values().as_primitive::<UInt32Type>()
719 );
720 assert_eq!(&DataType::UInt32, map_array.value_type());
721 assert_eq!(3, map_array.len());
722 assert_eq!(0, map_array.null_count());
723 assert_eq!(6, map_array.value_offsets()[2]);
724 assert_eq!(2, map_array.value_length(2));
725
726 let key_array = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
727 let value_array = Arc::new(UInt32Array::from(vec![0u32, 10, 20])) as ArrayRef;
728 let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
729 let values_field = Arc::new(Field::new("values", DataType::UInt32, false));
730 let struct_array =
731 StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
732 assert_eq!(
733 struct_array,
734 StructArray::from(map_array.value(0).into_data())
735 );
736 assert_eq!(
737 &struct_array,
738 unsafe { map_array.value_unchecked(0) }
739 .as_any()
740 .downcast_ref::<StructArray>()
741 .unwrap()
742 );
743 for i in 0..3 {
744 assert!(map_array.is_valid(i));
745 assert!(!map_array.is_null(i));
746 }
747 }
748
749 #[test]
750 fn test_try_new() {
751 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
752 let fields = Fields::from(vec![
753 Field::new("key", DataType::Int32, false),
754 Field::new("values", DataType::Int32, false),
755 ]);
756 let columns = vec![
757 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
758 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
759 ];
760
761 let entries = StructArray::new(fields.clone(), columns, None);
762 let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
763
764 MapArray::new(field.clone(), offsets.clone(), entries.clone(), None, false);
765
766 let nulls = NullBuffer::new_null(3);
767 MapArray::new(field.clone(), offsets, entries.clone(), Some(nulls), false);
768
769 let nulls = NullBuffer::new_null(3);
770 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
771 let err = MapArray::try_new(
772 field.clone(),
773 offsets.clone(),
774 entries.clone(),
775 Some(nulls),
776 false,
777 )
778 .unwrap_err();
779
780 assert_eq!(
781 err.to_string(),
782 "Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3"
783 );
784
785 let err = MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
786 .unwrap_err();
787
788 assert_eq!(
789 err.to_string(),
790 "Invalid argument error: Max offset of 5 exceeds length of entries 2"
791 );
792
793 let field = Arc::new(Field::new("element", DataType::Int64, false));
794 let err = MapArray::try_new(field, offsets.clone(), entries, None, false)
795 .unwrap_err()
796 .to_string();
797
798 assert!(
799 err.starts_with("Invalid argument error: MapArray expected data type Int64 got Struct"),
800 "{err}"
801 );
802
803 let fields = Fields::from(vec![
804 Field::new("a", DataType::Int32, false),
805 Field::new("b", DataType::Int32, false),
806 Field::new("c", DataType::Int32, false),
807 ]);
808 let columns = vec![
809 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
810 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
811 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
812 ];
813
814 let s = StructArray::new(fields.clone(), columns, None);
815 let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
816 let err = MapArray::try_new(field, offsets, s, None, false).unwrap_err();
817
818 assert_eq!(
819 err.to_string(),
820 "Invalid argument error: MapArray entries must contain two children, got 3"
821 );
822 }
823}