geoarrow_array/array/
multipolygon.rs

1use std::sync::Arc;
2
3use arrow_array::cast::AsArray;
4use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
5use arrow_buffer::{NullBuffer, OffsetBuffer};
6use arrow_schema::{DataType, Field};
7use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
8use geoarrow_schema::type_id::GeometryTypeId;
9use geoarrow_schema::{CoordType, Dimension, GeoArrowType, Metadata, MultiPolygonType};
10
11use crate::array::{CoordBuffer, GenericWkbArray, PolygonArray};
12use crate::builder::MultiPolygonBuilder;
13use crate::capacity::MultiPolygonCapacity;
14use crate::eq::offset_buffer_eq;
15use crate::scalar::MultiPolygon;
16use crate::trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow};
17use crate::util::{OffsetBufferUtils, offsets_buffer_i64_to_i32};
18
19/// An immutable array of MultiPolygon geometries.
20///
21/// This is semantically equivalent to `Vec<Option<MultiPolygon>>` due to the internal validity
22/// bitmap.
23#[derive(Debug, Clone)]
24pub struct MultiPolygonArray {
25    pub(crate) data_type: MultiPolygonType,
26
27    pub(crate) coords: CoordBuffer,
28
29    /// Offsets into the polygon array where each geometry starts
30    pub(crate) geom_offsets: OffsetBuffer<i32>,
31
32    /// Offsets into the ring array where each polygon starts
33    pub(crate) polygon_offsets: OffsetBuffer<i32>,
34
35    /// Offsets into the coordinate array where each ring starts
36    pub(crate) ring_offsets: OffsetBuffer<i32>,
37
38    /// Validity bitmap
39    pub(crate) nulls: Option<NullBuffer>,
40}
41
42pub(super) fn check(
43    coords: &CoordBuffer,
44    geom_offsets: &OffsetBuffer<i32>,
45    polygon_offsets: &OffsetBuffer<i32>,
46    ring_offsets: &OffsetBuffer<i32>,
47    validity_len: Option<usize>,
48) -> GeoArrowResult<()> {
49    if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) {
50        return Err(GeoArrowError::InvalidGeoArrow(
51            "nulls mask length must match the number of values".to_string(),
52        ));
53    }
54
55    // Offset can be smaller than coords length if sliced
56    if *ring_offsets.last() as usize != coords.len() {
57        return Err(GeoArrowError::InvalidGeoArrow(
58            "largest ring offset must match coords length".to_string(),
59        ));
60    }
61
62    if *polygon_offsets.last() as usize != ring_offsets.len_proxy() {
63        return Err(GeoArrowError::InvalidGeoArrow(
64            "largest polygon offset must match ring offsets length".to_string(),
65        ));
66    }
67
68    if *geom_offsets.last() as usize > polygon_offsets.len_proxy() {
69        return Err(GeoArrowError::InvalidGeoArrow(
70            "largest geometry offset must not be longer than polygon offsets length".to_string(),
71        ));
72    }
73
74    Ok(())
75}
76
77impl MultiPolygonArray {
78    /// Create a new MultiPolygonArray from parts
79    ///
80    /// # Implementation
81    ///
82    /// This function is `O(1)`.
83    ///
84    /// # Panics
85    ///
86    /// - if the nulls is not `None` and its length is different from the number of geometries
87    /// - if the largest ring offset does not match the number of coordinates
88    /// - if the largest polygon offset does not match the size of ring offsets
89    /// - if the largest geometry offset does not match the size of polygon offsets
90    pub fn new(
91        coords: CoordBuffer,
92        geom_offsets: OffsetBuffer<i32>,
93        polygon_offsets: OffsetBuffer<i32>,
94        ring_offsets: OffsetBuffer<i32>,
95        nulls: Option<NullBuffer>,
96        metadata: Arc<Metadata>,
97    ) -> Self {
98        Self::try_new(
99            coords,
100            geom_offsets,
101            polygon_offsets,
102            ring_offsets,
103            nulls,
104            metadata,
105        )
106        .unwrap()
107    }
108
109    /// Create a new MultiPolygonArray from parts
110    ///
111    /// # Implementation
112    ///
113    /// This function is `O(1)`.
114    ///
115    /// # Errors
116    ///
117    /// - if the nulls is not `None` and its length is different from the number of geometries
118    /// - if the largest ring offset does not match the number of coordinates
119    /// - if the largest polygon offset does not match the size of ring offsets
120    /// - if the largest geometry offset does not match the size of polygon offsets
121    pub fn try_new(
122        coords: CoordBuffer,
123        geom_offsets: OffsetBuffer<i32>,
124        polygon_offsets: OffsetBuffer<i32>,
125        ring_offsets: OffsetBuffer<i32>,
126        nulls: Option<NullBuffer>,
127        metadata: Arc<Metadata>,
128    ) -> GeoArrowResult<Self> {
129        check(
130            &coords,
131            &geom_offsets,
132            &polygon_offsets,
133            &ring_offsets,
134            nulls.as_ref().map(|v| v.len()),
135        )?;
136        Ok(Self {
137            data_type: MultiPolygonType::new(coords.dim(), metadata)
138                .with_coord_type(coords.coord_type()),
139            coords,
140            geom_offsets,
141            polygon_offsets,
142            ring_offsets,
143            nulls,
144        })
145    }
146
147    fn vertices_field(&self) -> Arc<Field> {
148        Field::new("vertices", self.coords.storage_type(), false).into()
149    }
150
151    fn rings_field(&self) -> Arc<Field> {
152        let name = "rings";
153        Field::new_list(name, self.vertices_field(), false).into()
154    }
155
156    fn polygons_field(&self) -> Arc<Field> {
157        let name = "polygons";
158        Field::new_list(name, self.rings_field(), false).into()
159    }
160
161    /// Access the underlying coordinate buffer
162    pub fn coords(&self) -> &CoordBuffer {
163        &self.coords
164    }
165
166    /// Access the underlying geometry offsets buffer
167    pub fn geom_offsets(&self) -> &OffsetBuffer<i32> {
168        &self.geom_offsets
169    }
170
171    /// Access the underlying polygon offsets buffer
172    pub fn polygon_offsets(&self) -> &OffsetBuffer<i32> {
173        &self.polygon_offsets
174    }
175
176    /// Access the underlying ring offsets buffer
177    pub fn ring_offsets(&self) -> &OffsetBuffer<i32> {
178        &self.ring_offsets
179    }
180
181    /// The lengths of each buffer contained in this array.
182    pub fn buffer_lengths(&self) -> MultiPolygonCapacity {
183        MultiPolygonCapacity::new(
184            *self.ring_offsets.last() as usize,
185            *self.polygon_offsets.last() as usize,
186            *self.geom_offsets.last() as usize,
187            self.len(),
188        )
189    }
190
191    /// The number of bytes occupied by this array.
192    pub fn num_bytes(&self) -> usize {
193        let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0);
194        validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension())
195    }
196
197    /// Slice this [`MultiPolygonArray`].
198    ///
199    /// # Panic
200    /// This function panics iff `offset + length > self.len()`.
201    #[inline]
202    pub fn slice(&self, offset: usize, length: usize) -> Self {
203        assert!(
204            offset + length <= self.len(),
205            "offset + length may not exceed length of array"
206        );
207        // Note: we **only** slice the geom_offsets and not any actual data. Otherwise the offsets
208        // would be in the wrong location.
209        Self {
210            data_type: self.data_type.clone(),
211            coords: self.coords.clone(),
212            geom_offsets: self.geom_offsets.slice(offset, length),
213            polygon_offsets: self.polygon_offsets.clone(),
214            ring_offsets: self.ring_offsets.clone(),
215            nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)),
216        }
217    }
218
219    /// Change the [`CoordType`] of this array.
220    pub fn into_coord_type(self, coord_type: CoordType) -> Self {
221        Self {
222            data_type: self.data_type.with_coord_type(coord_type),
223            coords: self.coords.into_coord_type(coord_type),
224            ..self
225        }
226    }
227
228    /// Change the [`Metadata`] of this array.
229    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
230        Self {
231            data_type: self.data_type.with_metadata(metadata),
232            ..self
233        }
234    }
235}
236
237impl GeoArrowArray for MultiPolygonArray {
238    fn as_any(&self) -> &dyn std::any::Any {
239        self
240    }
241
242    fn into_array_ref(self) -> ArrayRef {
243        Arc::new(self.into_arrow())
244    }
245
246    fn to_array_ref(&self) -> ArrayRef {
247        self.clone().into_array_ref()
248    }
249
250    #[inline]
251    fn len(&self) -> usize {
252        self.geom_offsets.len_proxy()
253    }
254
255    #[inline]
256    fn logical_nulls(&self) -> Option<NullBuffer> {
257        self.nulls.clone()
258    }
259
260    #[inline]
261    fn logical_null_count(&self) -> usize {
262        self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0)
263    }
264
265    #[inline]
266    fn is_null(&self, i: usize) -> bool {
267        self.nulls
268            .as_ref()
269            .map(|n| n.is_null(i))
270            .unwrap_or_default()
271    }
272
273    fn data_type(&self) -> GeoArrowType {
274        GeoArrowType::MultiPolygon(self.data_type.clone())
275    }
276
277    fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray> {
278        Arc::new(self.slice(offset, length))
279    }
280
281    fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray> {
282        Arc::new(self.with_metadata(metadata))
283    }
284}
285
286impl<'a> GeoArrowArrayAccessor<'a> for MultiPolygonArray {
287    type Item = MultiPolygon<'a>;
288
289    unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
290        Ok(MultiPolygon::new(
291            &self.coords,
292            &self.geom_offsets,
293            &self.polygon_offsets,
294            &self.ring_offsets,
295            index,
296        ))
297    }
298}
299
300impl IntoArrow for MultiPolygonArray {
301    type ArrowArray = GenericListArray<i32>;
302    type ExtensionType = MultiPolygonType;
303
304    fn into_arrow(self) -> Self::ArrowArray {
305        let vertices_field = self.vertices_field();
306        let rings_field = self.rings_field();
307        let polygons_field = self.polygons_field();
308
309        let nulls = self.nulls;
310        let coord_array = ArrayRef::from(self.coords);
311        let ring_array = Arc::new(GenericListArray::new(
312            vertices_field,
313            self.ring_offsets,
314            coord_array,
315            None,
316        ));
317        let polygons_array = Arc::new(GenericListArray::new(
318            rings_field,
319            self.polygon_offsets,
320            ring_array,
321            None,
322        ));
323        GenericListArray::new(polygons_field, self.geom_offsets, polygons_array, nulls)
324    }
325
326    fn extension_type(&self) -> &Self::ExtensionType {
327        &self.data_type
328    }
329}
330
331impl TryFrom<(&GenericListArray<i32>, MultiPolygonType)> for MultiPolygonArray {
332    type Error = GeoArrowError;
333
334    fn try_from(
335        (geom_array, typ): (&GenericListArray<i32>, MultiPolygonType),
336    ) -> GeoArrowResult<Self> {
337        let geom_offsets = geom_array.offsets();
338        let nulls = geom_array.nulls();
339
340        let polygons_dyn_array = geom_array.values();
341        let polygons_array = polygons_dyn_array.as_list::<i32>();
342
343        let polygon_offsets = polygons_array.offsets();
344        let rings_dyn_array = polygons_array.values();
345        let rings_array = rings_dyn_array.as_list::<i32>();
346
347        let ring_offsets = rings_array.offsets();
348        let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?;
349
350        Ok(Self::new(
351            coords,
352            geom_offsets.clone(),
353            polygon_offsets.clone(),
354            ring_offsets.clone(),
355            nulls.cloned(),
356            typ.metadata().clone(),
357        ))
358    }
359}
360
361impl TryFrom<(&GenericListArray<i64>, MultiPolygonType)> for MultiPolygonArray {
362    type Error = GeoArrowError;
363
364    fn try_from(
365        (geom_array, typ): (&GenericListArray<i64>, MultiPolygonType),
366    ) -> GeoArrowResult<Self> {
367        let geom_offsets = offsets_buffer_i64_to_i32(geom_array.offsets())?;
368        let nulls = geom_array.nulls();
369
370        let polygons_dyn_array = geom_array.values();
371        let polygons_array = polygons_dyn_array.as_list::<i64>();
372
373        let polygon_offsets = offsets_buffer_i64_to_i32(polygons_array.offsets())?;
374        let rings_dyn_array = polygons_array.values();
375        let rings_array = rings_dyn_array.as_list::<i64>();
376
377        let ring_offsets = offsets_buffer_i64_to_i32(rings_array.offsets())?;
378        let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?;
379
380        Ok(Self::new(
381            coords,
382            geom_offsets,
383            polygon_offsets,
384            ring_offsets,
385            nulls.cloned(),
386            typ.metadata().clone(),
387        ))
388    }
389}
390
391impl TryFrom<(&dyn Array, MultiPolygonType)> for MultiPolygonArray {
392    type Error = GeoArrowError;
393
394    fn try_from((value, typ): (&dyn Array, MultiPolygonType)) -> GeoArrowResult<Self> {
395        match value.data_type() {
396            DataType::List(_) => (value.as_list::<i32>(), typ).try_into(),
397            DataType::LargeList(_) => (value.as_list::<i64>(), typ).try_into(),
398            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
399                "Unexpected MultiPolygon DataType: {dt:?}",
400            ))),
401        }
402    }
403}
404
405impl TryFrom<(&dyn Array, &Field)> for MultiPolygonArray {
406    type Error = GeoArrowError;
407
408    fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult<Self> {
409        let typ = field.try_extension_type::<MultiPolygonType>()?;
410        (arr, typ).try_into()
411    }
412}
413
414impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, MultiPolygonType)> for MultiPolygonArray {
415    type Error = GeoArrowError;
416
417    fn try_from(value: (GenericWkbArray<O>, MultiPolygonType)) -> GeoArrowResult<Self> {
418        let mut_arr: MultiPolygonBuilder = value.try_into()?;
419        Ok(mut_arr.finish())
420    }
421}
422
423impl From<PolygonArray> for MultiPolygonArray {
424    fn from(value: PolygonArray) -> Self {
425        let (coord_type, dimension, metadata) = value.data_type.into_inner();
426        let new_type = MultiPolygonType::new(dimension, metadata).with_coord_type(coord_type);
427
428        let coords = value.coords;
429        let geom_offsets = OffsetBuffer::from_lengths(vec![1; coords.len()]);
430        let ring_offsets = value.ring_offsets;
431        let polygon_offsets = value.geom_offsets;
432        let nulls = value.nulls;
433        Self {
434            data_type: new_type,
435            coords,
436            geom_offsets,
437            polygon_offsets,
438            ring_offsets,
439            nulls,
440        }
441    }
442}
443
444impl PartialEq for MultiPolygonArray {
445    fn eq(&self, other: &Self) -> bool {
446        self.nulls == other.nulls
447            && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets)
448            && offset_buffer_eq(&self.polygon_offsets, &other.polygon_offsets)
449            && offset_buffer_eq(&self.ring_offsets, &other.ring_offsets)
450            && self.coords == other.coords
451    }
452}
453
454impl GeometryTypeId for MultiPolygonArray {
455    const GEOMETRY_TYPE_OFFSET: i8 = 6;
456
457    fn dimension(&self) -> Dimension {
458        self.data_type.dimension()
459    }
460}
461
462#[cfg(test)]
463mod test {
464    use geo_traits::to_geo::ToGeoMultiPolygon;
465    use geoarrow_schema::{CoordType, Dimension};
466
467    use super::*;
468    use crate::test::multipolygon;
469
470    #[test]
471    fn geo_round_trip() {
472        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
473            let geoms = [
474                Some(multipolygon::mp0()),
475                None,
476                Some(multipolygon::mp1()),
477                None,
478            ];
479            let typ = MultiPolygonType::new(Dimension::XY, Default::default())
480                .with_coord_type(coord_type);
481            let geo_arr = MultiPolygonBuilder::from_nullable_multi_polygons(&geoms, typ).finish();
482
483            for (i, g) in geo_arr.iter().enumerate() {
484                assert_eq!(
485                    geoms[i],
486                    g.transpose().unwrap().map(|g| g.to_multi_polygon())
487                );
488            }
489
490            // Test sliced
491            for (i, g) in geo_arr.slice(2, 2).iter().enumerate() {
492                assert_eq!(
493                    geoms[i + 2],
494                    g.transpose().unwrap().map(|g| g.to_multi_polygon())
495                );
496            }
497        }
498    }
499
500    #[test]
501    fn geo_round_trip2() {
502        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
503            let geo_arr = multipolygon::array(coord_type, Dimension::XY);
504            let geo_geoms = geo_arr
505                .iter()
506                .map(|x| x.transpose().unwrap().map(|g| g.to_multi_polygon()))
507                .collect::<Vec<_>>();
508
509            let typ = MultiPolygonType::new(Dimension::XY, Default::default())
510                .with_coord_type(coord_type);
511            let geo_arr2 =
512                MultiPolygonBuilder::from_nullable_multi_polygons(&geo_geoms, typ).finish();
513            assert_eq!(geo_arr, geo_arr2);
514        }
515    }
516
517    #[test]
518    fn try_from_arrow() {
519        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
520            for dim in [
521                Dimension::XY,
522                Dimension::XYZ,
523                Dimension::XYM,
524                Dimension::XYZM,
525            ] {
526                let geo_arr = multipolygon::array(coord_type, dim);
527
528                let extension_type = geo_arr.extension_type().clone();
529                let field = extension_type.to_field("geometry", true);
530
531                let arrow_arr = geo_arr.to_array_ref();
532
533                let geo_arr2: MultiPolygonArray =
534                    (arrow_arr.as_ref(), extension_type).try_into().unwrap();
535                let geo_arr3: MultiPolygonArray = (arrow_arr.as_ref(), &field).try_into().unwrap();
536
537                assert_eq!(geo_arr, geo_arr2);
538                assert_eq!(geo_arr, geo_arr3);
539            }
540        }
541    }
542
543    #[test]
544    fn partial_eq() {
545        for dim in [
546            Dimension::XY,
547            Dimension::XYZ,
548            Dimension::XYM,
549            Dimension::XYZM,
550        ] {
551            let arr1 = multipolygon::array(CoordType::Interleaved, dim);
552            let arr2 = multipolygon::array(CoordType::Separated, dim);
553            assert_eq!(arr1, arr1);
554            assert_eq!(arr2, arr2);
555            assert_eq!(arr1, arr2);
556
557            assert_ne!(arr1, arr2.slice(0, 2));
558        }
559    }
560
561    #[test]
562    fn test_validation_with_sliced_array() {
563        let arr = multipolygon::array(CoordType::Interleaved, Dimension::XY);
564        let sliced = arr.slice(0, 1);
565
566        let back = MultiPolygonArray::try_from((
567            sliced.to_array_ref().as_ref(),
568            arr.extension_type().clone(),
569        ))
570        .unwrap();
571        assert_eq!(back.len(), 1);
572    }
573
574    #[test]
575    fn test_validation_with_array_sliced_by_arrow_rs() {
576        let arr = multipolygon::array(CoordType::Interleaved, Dimension::XY);
577        let sliced = arr.to_array_ref().slice(0, 1);
578
579        let back =
580            MultiPolygonArray::try_from((sliced.as_ref(), arr.extension_type().clone())).unwrap();
581        assert_eq!(back.len(), 1);
582    }
583}