geoarrow_array/array/
multipolygon.rs

1use std::sync::Arc;
2
3use arrow_array::cast::AsArray;
4use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
5use arrow_buffer::{NullBuffer, OffsetBuffer};
6use arrow_schema::{DataType, Field};
7use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
8use geoarrow_schema::type_id::GeometryTypeId;
9use geoarrow_schema::{CoordType, Dimension, GeoArrowType, Metadata, MultiPolygonType};
10
11use crate::array::{CoordBuffer, GenericWkbArray, PolygonArray};
12use crate::builder::MultiPolygonBuilder;
13use crate::capacity::MultiPolygonCapacity;
14use crate::eq::offset_buffer_eq;
15use crate::scalar::MultiPolygon;
16use crate::trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow};
17use crate::util::{OffsetBufferUtils, offsets_buffer_i64_to_i32};
18
19/// An immutable array of MultiPolygon geometries.
20///
21/// This is semantically equivalent to `Vec<Option<MultiPolygon>>` due to the internal validity
22/// bitmap.
23#[derive(Debug, Clone)]
24pub struct MultiPolygonArray {
25    pub(crate) data_type: MultiPolygonType,
26
27    pub(crate) coords: CoordBuffer,
28
29    /// Offsets into the polygon array where each geometry starts
30    pub(crate) geom_offsets: OffsetBuffer<i32>,
31
32    /// Offsets into the ring array where each polygon starts
33    pub(crate) polygon_offsets: OffsetBuffer<i32>,
34
35    /// Offsets into the coordinate array where each ring starts
36    pub(crate) ring_offsets: OffsetBuffer<i32>,
37
38    /// Validity bitmap
39    pub(crate) nulls: Option<NullBuffer>,
40}
41
42pub(super) fn check(
43    coords: &CoordBuffer,
44    geom_offsets: &OffsetBuffer<i32>,
45    polygon_offsets: &OffsetBuffer<i32>,
46    ring_offsets: &OffsetBuffer<i32>,
47    validity_len: Option<usize>,
48) -> GeoArrowResult<()> {
49    if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) {
50        return Err(GeoArrowError::InvalidGeoArrow(
51            "nulls mask length must match the number of values".to_string(),
52        ));
53    }
54    if *ring_offsets.last() as usize != coords.len() {
55        return Err(GeoArrowError::InvalidGeoArrow(
56            "largest ring offset must match coords length".to_string(),
57        ));
58    }
59
60    if *polygon_offsets.last() as usize != ring_offsets.len_proxy() {
61        return Err(GeoArrowError::InvalidGeoArrow(
62            "largest polygon offset must match ring offsets length".to_string(),
63        ));
64    }
65
66    if *geom_offsets.last() as usize != polygon_offsets.len_proxy() {
67        return Err(GeoArrowError::InvalidGeoArrow(
68            "largest geometry offset must match polygon offsets length".to_string(),
69        ));
70    }
71
72    Ok(())
73}
74
75impl MultiPolygonArray {
76    /// Create a new MultiPolygonArray from parts
77    ///
78    /// # Implementation
79    ///
80    /// This function is `O(1)`.
81    ///
82    /// # Panics
83    ///
84    /// - if the nulls is not `None` and its length is different from the number of geometries
85    /// - if the largest ring offset does not match the number of coordinates
86    /// - if the largest polygon offset does not match the size of ring offsets
87    /// - if the largest geometry offset does not match the size of polygon offsets
88    pub fn new(
89        coords: CoordBuffer,
90        geom_offsets: OffsetBuffer<i32>,
91        polygon_offsets: OffsetBuffer<i32>,
92        ring_offsets: OffsetBuffer<i32>,
93        nulls: Option<NullBuffer>,
94        metadata: Arc<Metadata>,
95    ) -> Self {
96        Self::try_new(
97            coords,
98            geom_offsets,
99            polygon_offsets,
100            ring_offsets,
101            nulls,
102            metadata,
103        )
104        .unwrap()
105    }
106
107    /// Create a new MultiPolygonArray from parts
108    ///
109    /// # Implementation
110    ///
111    /// This function is `O(1)`.
112    ///
113    /// # Errors
114    ///
115    /// - if the nulls is not `None` and its length is different from the number of geometries
116    /// - if the largest ring offset does not match the number of coordinates
117    /// - if the largest polygon offset does not match the size of ring offsets
118    /// - if the largest geometry offset does not match the size of polygon offsets
119    pub fn try_new(
120        coords: CoordBuffer,
121        geom_offsets: OffsetBuffer<i32>,
122        polygon_offsets: OffsetBuffer<i32>,
123        ring_offsets: OffsetBuffer<i32>,
124        nulls: Option<NullBuffer>,
125        metadata: Arc<Metadata>,
126    ) -> GeoArrowResult<Self> {
127        check(
128            &coords,
129            &geom_offsets,
130            &polygon_offsets,
131            &ring_offsets,
132            nulls.as_ref().map(|v| v.len()),
133        )?;
134        Ok(Self {
135            data_type: MultiPolygonType::new(coords.dim(), metadata)
136                .with_coord_type(coords.coord_type()),
137            coords,
138            geom_offsets,
139            polygon_offsets,
140            ring_offsets,
141            nulls,
142        })
143    }
144
145    fn vertices_field(&self) -> Arc<Field> {
146        Field::new("vertices", self.coords.storage_type(), false).into()
147    }
148
149    fn rings_field(&self) -> Arc<Field> {
150        let name = "rings";
151        Field::new_list(name, self.vertices_field(), false).into()
152    }
153
154    fn polygons_field(&self) -> Arc<Field> {
155        let name = "polygons";
156        Field::new_list(name, self.rings_field(), false).into()
157    }
158
159    /// Access the underlying coordinate buffer
160    pub fn coords(&self) -> &CoordBuffer {
161        &self.coords
162    }
163
164    /// Access the underlying geometry offsets buffer
165    pub fn geom_offsets(&self) -> &OffsetBuffer<i32> {
166        &self.geom_offsets
167    }
168
169    /// Access the underlying polygon offsets buffer
170    pub fn polygon_offsets(&self) -> &OffsetBuffer<i32> {
171        &self.polygon_offsets
172    }
173
174    /// Access the underlying ring offsets buffer
175    pub fn ring_offsets(&self) -> &OffsetBuffer<i32> {
176        &self.ring_offsets
177    }
178
179    /// The lengths of each buffer contained in this array.
180    pub fn buffer_lengths(&self) -> MultiPolygonCapacity {
181        MultiPolygonCapacity::new(
182            *self.ring_offsets.last() as usize,
183            *self.polygon_offsets.last() as usize,
184            *self.geom_offsets.last() as usize,
185            self.len(),
186        )
187    }
188
189    /// The number of bytes occupied by this array.
190    pub fn num_bytes(&self) -> usize {
191        let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0);
192        validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension())
193    }
194
195    /// Slice this [`MultiPolygonArray`].
196    ///
197    /// # Panic
198    /// This function panics iff `offset + length > self.len()`.
199    #[inline]
200    pub fn slice(&self, offset: usize, length: usize) -> Self {
201        assert!(
202            offset + length <= self.len(),
203            "offset + length may not exceed length of array"
204        );
205        // Note: we **only** slice the geom_offsets and not any actual data. Otherwise the offsets
206        // would be in the wrong location.
207        Self {
208            data_type: self.data_type.clone(),
209            coords: self.coords.clone(),
210            geom_offsets: self.geom_offsets.slice(offset, length),
211            polygon_offsets: self.polygon_offsets.clone(),
212            ring_offsets: self.ring_offsets.clone(),
213            nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)),
214        }
215    }
216
217    /// Change the [`CoordType`] of this array.
218    pub fn into_coord_type(self, coord_type: CoordType) -> Self {
219        Self {
220            data_type: self.data_type.with_coord_type(coord_type),
221            coords: self.coords.into_coord_type(coord_type),
222            ..self
223        }
224    }
225
226    /// Change the [`Metadata`] of this array.
227    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
228        Self {
229            data_type: self.data_type.with_metadata(metadata),
230            ..self
231        }
232    }
233}
234
235impl GeoArrowArray for MultiPolygonArray {
236    fn as_any(&self) -> &dyn std::any::Any {
237        self
238    }
239
240    fn into_array_ref(self) -> ArrayRef {
241        Arc::new(self.into_arrow())
242    }
243
244    fn to_array_ref(&self) -> ArrayRef {
245        self.clone().into_array_ref()
246    }
247
248    #[inline]
249    fn len(&self) -> usize {
250        self.geom_offsets.len_proxy()
251    }
252
253    #[inline]
254    fn logical_nulls(&self) -> Option<NullBuffer> {
255        self.nulls.clone()
256    }
257
258    #[inline]
259    fn logical_null_count(&self) -> usize {
260        self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0)
261    }
262
263    #[inline]
264    fn is_null(&self, i: usize) -> bool {
265        self.nulls
266            .as_ref()
267            .map(|n| n.is_null(i))
268            .unwrap_or_default()
269    }
270
271    fn data_type(&self) -> GeoArrowType {
272        GeoArrowType::MultiPolygon(self.data_type.clone())
273    }
274
275    fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray> {
276        Arc::new(self.slice(offset, length))
277    }
278
279    fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray> {
280        Arc::new(self.with_metadata(metadata))
281    }
282}
283
284impl<'a> GeoArrowArrayAccessor<'a> for MultiPolygonArray {
285    type Item = MultiPolygon<'a>;
286
287    unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
288        Ok(MultiPolygon::new(
289            &self.coords,
290            &self.geom_offsets,
291            &self.polygon_offsets,
292            &self.ring_offsets,
293            index,
294        ))
295    }
296}
297
298impl IntoArrow for MultiPolygonArray {
299    type ArrowArray = GenericListArray<i32>;
300    type ExtensionType = MultiPolygonType;
301
302    fn into_arrow(self) -> Self::ArrowArray {
303        let vertices_field = self.vertices_field();
304        let rings_field = self.rings_field();
305        let polygons_field = self.polygons_field();
306
307        let nulls = self.nulls;
308        let coord_array = ArrayRef::from(self.coords);
309        let ring_array = Arc::new(GenericListArray::new(
310            vertices_field,
311            self.ring_offsets,
312            coord_array,
313            None,
314        ));
315        let polygons_array = Arc::new(GenericListArray::new(
316            rings_field,
317            self.polygon_offsets,
318            ring_array,
319            None,
320        ));
321        GenericListArray::new(polygons_field, self.geom_offsets, polygons_array, nulls)
322    }
323
324    fn extension_type(&self) -> &Self::ExtensionType {
325        &self.data_type
326    }
327}
328
329impl TryFrom<(&GenericListArray<i32>, MultiPolygonType)> for MultiPolygonArray {
330    type Error = GeoArrowError;
331
332    fn try_from(
333        (geom_array, typ): (&GenericListArray<i32>, MultiPolygonType),
334    ) -> GeoArrowResult<Self> {
335        let geom_offsets = geom_array.offsets();
336        let nulls = geom_array.nulls();
337
338        let polygons_dyn_array = geom_array.values();
339        let polygons_array = polygons_dyn_array.as_list::<i32>();
340
341        let polygon_offsets = polygons_array.offsets();
342        let rings_dyn_array = polygons_array.values();
343        let rings_array = rings_dyn_array.as_list::<i32>();
344
345        let ring_offsets = rings_array.offsets();
346        let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?;
347
348        Ok(Self::new(
349            coords,
350            geom_offsets.clone(),
351            polygon_offsets.clone(),
352            ring_offsets.clone(),
353            nulls.cloned(),
354            typ.metadata().clone(),
355        ))
356    }
357}
358
359impl TryFrom<(&GenericListArray<i64>, MultiPolygonType)> for MultiPolygonArray {
360    type Error = GeoArrowError;
361
362    fn try_from(
363        (geom_array, typ): (&GenericListArray<i64>, MultiPolygonType),
364    ) -> GeoArrowResult<Self> {
365        let geom_offsets = offsets_buffer_i64_to_i32(geom_array.offsets())?;
366        let nulls = geom_array.nulls();
367
368        let polygons_dyn_array = geom_array.values();
369        let polygons_array = polygons_dyn_array.as_list::<i64>();
370
371        let polygon_offsets = offsets_buffer_i64_to_i32(polygons_array.offsets())?;
372        let rings_dyn_array = polygons_array.values();
373        let rings_array = rings_dyn_array.as_list::<i64>();
374
375        let ring_offsets = offsets_buffer_i64_to_i32(rings_array.offsets())?;
376        let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?;
377
378        Ok(Self::new(
379            coords,
380            geom_offsets,
381            polygon_offsets,
382            ring_offsets,
383            nulls.cloned(),
384            typ.metadata().clone(),
385        ))
386    }
387}
388
389impl TryFrom<(&dyn Array, MultiPolygonType)> for MultiPolygonArray {
390    type Error = GeoArrowError;
391
392    fn try_from((value, typ): (&dyn Array, MultiPolygonType)) -> GeoArrowResult<Self> {
393        match value.data_type() {
394            DataType::List(_) => (value.as_list::<i32>(), typ).try_into(),
395            DataType::LargeList(_) => (value.as_list::<i64>(), typ).try_into(),
396            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
397                "Unexpected MultiPolygon DataType: {dt:?}",
398            ))),
399        }
400    }
401}
402
403impl TryFrom<(&dyn Array, &Field)> for MultiPolygonArray {
404    type Error = GeoArrowError;
405
406    fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult<Self> {
407        let typ = field.try_extension_type::<MultiPolygonType>()?;
408        (arr, typ).try_into()
409    }
410}
411
412impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, MultiPolygonType)> for MultiPolygonArray {
413    type Error = GeoArrowError;
414
415    fn try_from(value: (GenericWkbArray<O>, MultiPolygonType)) -> GeoArrowResult<Self> {
416        let mut_arr: MultiPolygonBuilder = value.try_into()?;
417        Ok(mut_arr.finish())
418    }
419}
420
421impl From<PolygonArray> for MultiPolygonArray {
422    fn from(value: PolygonArray) -> Self {
423        let (coord_type, dimension, metadata) = value.data_type.into_inner();
424        let new_type = MultiPolygonType::new(dimension, metadata).with_coord_type(coord_type);
425
426        let coords = value.coords;
427        let geom_offsets = OffsetBuffer::from_lengths(vec![1; coords.len()]);
428        let ring_offsets = value.ring_offsets;
429        let polygon_offsets = value.geom_offsets;
430        let nulls = value.nulls;
431        Self {
432            data_type: new_type,
433            coords,
434            geom_offsets,
435            polygon_offsets,
436            ring_offsets,
437            nulls,
438        }
439    }
440}
441
442impl PartialEq for MultiPolygonArray {
443    fn eq(&self, other: &Self) -> bool {
444        self.nulls == other.nulls
445            && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets)
446            && offset_buffer_eq(&self.polygon_offsets, &other.polygon_offsets)
447            && offset_buffer_eq(&self.ring_offsets, &other.ring_offsets)
448            && self.coords == other.coords
449    }
450}
451
452impl GeometryTypeId for MultiPolygonArray {
453    const GEOMETRY_TYPE_OFFSET: i8 = 6;
454
455    fn dimension(&self) -> Dimension {
456        self.data_type.dimension()
457    }
458}
459
460#[cfg(test)]
461mod test {
462    use geo_traits::to_geo::ToGeoMultiPolygon;
463    use geoarrow_schema::{CoordType, Dimension};
464
465    use super::*;
466    use crate::test::multipolygon;
467
468    #[test]
469    fn geo_round_trip() {
470        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
471            let geoms = [
472                Some(multipolygon::mp0()),
473                None,
474                Some(multipolygon::mp1()),
475                None,
476            ];
477            let typ = MultiPolygonType::new(Dimension::XY, Default::default())
478                .with_coord_type(coord_type);
479            let geo_arr = MultiPolygonBuilder::from_nullable_multi_polygons(&geoms, typ).finish();
480
481            for (i, g) in geo_arr.iter().enumerate() {
482                assert_eq!(
483                    geoms[i],
484                    g.transpose().unwrap().map(|g| g.to_multi_polygon())
485                );
486            }
487
488            // Test sliced
489            for (i, g) in geo_arr.slice(2, 2).iter().enumerate() {
490                assert_eq!(
491                    geoms[i + 2],
492                    g.transpose().unwrap().map(|g| g.to_multi_polygon())
493                );
494            }
495        }
496    }
497
498    #[test]
499    fn geo_round_trip2() {
500        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
501            let geo_arr = multipolygon::array(coord_type, Dimension::XY);
502            let geo_geoms = geo_arr
503                .iter()
504                .map(|x| x.transpose().unwrap().map(|g| g.to_multi_polygon()))
505                .collect::<Vec<_>>();
506
507            let typ = MultiPolygonType::new(Dimension::XY, Default::default())
508                .with_coord_type(coord_type);
509            let geo_arr2 =
510                MultiPolygonBuilder::from_nullable_multi_polygons(&geo_geoms, typ).finish();
511            assert_eq!(geo_arr, geo_arr2);
512        }
513    }
514
515    #[test]
516    fn try_from_arrow() {
517        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
518            for dim in [
519                Dimension::XY,
520                Dimension::XYZ,
521                Dimension::XYM,
522                Dimension::XYZM,
523            ] {
524                let geo_arr = multipolygon::array(coord_type, dim);
525
526                let extension_type = geo_arr.extension_type().clone();
527                let field = extension_type.to_field("geometry", true);
528
529                let arrow_arr = geo_arr.to_array_ref();
530
531                let geo_arr2: MultiPolygonArray =
532                    (arrow_arr.as_ref(), extension_type).try_into().unwrap();
533                let geo_arr3: MultiPolygonArray = (arrow_arr.as_ref(), &field).try_into().unwrap();
534
535                assert_eq!(geo_arr, geo_arr2);
536                assert_eq!(geo_arr, geo_arr3);
537            }
538        }
539    }
540
541    #[test]
542    fn partial_eq() {
543        for dim in [
544            Dimension::XY,
545            Dimension::XYZ,
546            Dimension::XYM,
547            Dimension::XYZM,
548        ] {
549            let arr1 = multipolygon::array(CoordType::Interleaved, dim);
550            let arr2 = multipolygon::array(CoordType::Separated, dim);
551            assert_eq!(arr1, arr1);
552            assert_eq!(arr2, arr2);
553            assert_eq!(arr1, arr2);
554
555            assert_ne!(arr1, arr2.slice(0, 2));
556        }
557    }
558}