geoarrow_array/array/
geometrycollection.rs

1use std::sync::Arc;
2
3use arrow_array::cast::AsArray;
4use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
5use arrow_buffer::{NullBuffer, OffsetBuffer};
6use arrow_schema::{DataType, Field};
7use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
8use geoarrow_schema::type_id::GeometryTypeId;
9use geoarrow_schema::{CoordType, Dimension, GeoArrowType, GeometryCollectionType, Metadata};
10
11use crate::array::{GenericWkbArray, MixedGeometryArray};
12use crate::builder::GeometryCollectionBuilder;
13use crate::capacity::GeometryCollectionCapacity;
14use crate::eq::offset_buffer_eq;
15use crate::scalar::GeometryCollection;
16use crate::trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow};
17use crate::util::{OffsetBufferUtils, offsets_buffer_i64_to_i32};
18
19/// An immutable array of GeometryCollection geometries.
20///
21/// This is semantically equivalent to `Vec<Option<GeometryCollection>>` due to the internal
22/// validity bitmap.
23#[derive(Debug, Clone)]
24pub struct GeometryCollectionArray {
25    pub(crate) data_type: GeometryCollectionType,
26
27    pub(crate) array: MixedGeometryArray,
28
29    /// Offsets into the mixed geometry array where each geometry starts
30    pub(crate) geom_offsets: OffsetBuffer<i32>,
31
32    /// Validity bitmap
33    pub(crate) nulls: Option<NullBuffer>,
34}
35
36impl GeometryCollectionArray {
37    /// Create a new GeometryCollectionArray from parts
38    ///
39    /// # Implementation
40    ///
41    /// This function is `O(1)`.
42    pub fn new(
43        array: MixedGeometryArray,
44        geom_offsets: OffsetBuffer<i32>,
45        nulls: Option<NullBuffer>,
46        metadata: Arc<Metadata>,
47    ) -> Self {
48        Self {
49            data_type: GeometryCollectionType::new(array.dim, metadata)
50                .with_coord_type(array.coord_type),
51            array,
52            geom_offsets,
53            nulls,
54        }
55    }
56
57    fn geometries_field(&self) -> Arc<Field> {
58        Field::new("geometries", self.array.storage_type(), false).into()
59    }
60
61    /// The lengths of each buffer contained in this array.
62    pub fn buffer_lengths(&self) -> GeometryCollectionCapacity {
63        GeometryCollectionCapacity::new(
64            self.array.buffer_lengths(),
65            *self.geom_offsets.last() as usize,
66        )
67    }
68
69    /// The number of bytes occupied by this array.
70    pub fn num_bytes(&self) -> usize {
71        let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0);
72        validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension())
73    }
74
75    /// Slice this [`GeometryCollectionArray`].
76    ///
77    /// # Implementation
78    ///
79    /// This operation is `O(1)` as it amounts to increasing a few ref counts.
80    ///
81    /// # Panic
82    /// This function panics iff `offset + length > self.len()`.
83    #[inline]
84    pub fn slice(&self, offset: usize, length: usize) -> Self {
85        assert!(
86            offset + length <= self.len(),
87            "offset + length may not exceed length of array"
88        );
89        // Note: we **only** slice the geom_offsets and not any actual data
90        Self {
91            data_type: self.data_type.clone(),
92            array: self.array.clone(),
93            geom_offsets: self.geom_offsets.slice(offset, length),
94            nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)),
95        }
96    }
97
98    /// Change the [`CoordType`] of this array.
99    pub fn into_coord_type(self, coord_type: CoordType) -> Self {
100        Self {
101            data_type: self.data_type.with_coord_type(coord_type),
102            array: self.array.into_coord_type(coord_type),
103            ..self
104        }
105    }
106
107    /// Change the [`Metadata`] of this array.
108    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
109        Self {
110            data_type: self.data_type.with_metadata(metadata),
111            ..self
112        }
113    }
114}
115
116impl GeoArrowArray for GeometryCollectionArray {
117    fn as_any(&self) -> &dyn std::any::Any {
118        self
119    }
120
121    fn into_array_ref(self) -> ArrayRef {
122        Arc::new(self.into_arrow())
123    }
124
125    fn to_array_ref(&self) -> ArrayRef {
126        self.clone().into_array_ref()
127    }
128
129    #[inline]
130    fn len(&self) -> usize {
131        self.geom_offsets.len_proxy()
132    }
133
134    #[inline]
135    fn logical_nulls(&self) -> Option<NullBuffer> {
136        self.nulls.clone()
137    }
138
139    #[inline]
140    fn logical_null_count(&self) -> usize {
141        self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0)
142    }
143
144    #[inline]
145    fn is_null(&self, i: usize) -> bool {
146        self.nulls
147            .as_ref()
148            .map(|n| n.is_null(i))
149            .unwrap_or_default()
150    }
151
152    fn data_type(&self) -> GeoArrowType {
153        GeoArrowType::GeometryCollection(self.data_type.clone())
154    }
155
156    fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray> {
157        Arc::new(self.slice(offset, length))
158    }
159
160    fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray> {
161        Arc::new(self.with_metadata(metadata))
162    }
163}
164
165impl<'a> GeoArrowArrayAccessor<'a> for GeometryCollectionArray {
166    type Item = GeometryCollection<'a>;
167
168    unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
169        Ok(GeometryCollection::new(
170            &self.array,
171            &self.geom_offsets,
172            index,
173        ))
174    }
175}
176
177impl IntoArrow for GeometryCollectionArray {
178    type ArrowArray = GenericListArray<i32>;
179    type ExtensionType = GeometryCollectionType;
180
181    fn into_arrow(self) -> Self::ArrowArray {
182        let geometries_field = self.geometries_field();
183        let nulls = self.nulls;
184        let values = self.array.into_array_ref();
185        GenericListArray::new(geometries_field, self.geom_offsets, values, nulls)
186    }
187
188    fn extension_type(&self) -> &Self::ExtensionType {
189        &self.data_type
190    }
191}
192
193impl TryFrom<(&GenericListArray<i32>, GeometryCollectionType)> for GeometryCollectionArray {
194    type Error = GeoArrowError;
195
196    fn try_from(
197        (value, typ): (&GenericListArray<i32>, GeometryCollectionType),
198    ) -> GeoArrowResult<Self> {
199        let geoms: MixedGeometryArray =
200            (value.values().as_ref(), typ.dimension(), typ.coord_type()).try_into()?;
201        let geom_offsets = value.offsets();
202        let nulls = value.nulls();
203
204        Ok(Self::new(
205            geoms,
206            geom_offsets.clone(),
207            nulls.cloned(),
208            typ.metadata().clone(),
209        ))
210    }
211}
212
213impl TryFrom<(&GenericListArray<i64>, GeometryCollectionType)> for GeometryCollectionArray {
214    type Error = GeoArrowError;
215
216    fn try_from(
217        (value, typ): (&GenericListArray<i64>, GeometryCollectionType),
218    ) -> GeoArrowResult<Self> {
219        let geoms: MixedGeometryArray =
220            (value.values().as_ref(), typ.dimension(), typ.coord_type()).try_into()?;
221        let geom_offsets = offsets_buffer_i64_to_i32(value.offsets())?;
222        let nulls = value.nulls();
223
224        Ok(Self::new(
225            geoms,
226            geom_offsets,
227            nulls.cloned(),
228            typ.metadata().clone(),
229        ))
230    }
231}
232
233impl TryFrom<(&dyn Array, GeometryCollectionType)> for GeometryCollectionArray {
234    type Error = GeoArrowError;
235
236    fn try_from((value, typ): (&dyn Array, GeometryCollectionType)) -> GeoArrowResult<Self> {
237        match value.data_type() {
238            DataType::List(_) => (value.as_list::<i32>(), typ).try_into(),
239            DataType::LargeList(_) => (value.as_list::<i64>(), typ).try_into(),
240            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
241                "Unexpected GeometryCollection Arrow DataType: {dt:?}"
242            ))),
243        }
244    }
245}
246
247impl TryFrom<(&dyn Array, &Field)> for GeometryCollectionArray {
248    type Error = GeoArrowError;
249
250    fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult<Self> {
251        let typ = field.try_extension_type::<GeometryCollectionType>()?;
252        (arr, typ).try_into()
253    }
254}
255
256impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, GeometryCollectionType)>
257    for GeometryCollectionArray
258{
259    type Error = GeoArrowError;
260
261    fn try_from(value: (GenericWkbArray<O>, GeometryCollectionType)) -> GeoArrowResult<Self> {
262        let mut_arr: GeometryCollectionBuilder = value.try_into()?;
263        Ok(mut_arr.finish())
264    }
265}
266
267impl PartialEq for GeometryCollectionArray {
268    fn eq(&self, other: &Self) -> bool {
269        self.nulls == other.nulls
270            && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets)
271            && self.array == other.array
272    }
273}
274
275impl GeometryTypeId for GeometryCollectionArray {
276    const GEOMETRY_TYPE_OFFSET: i8 = 7;
277
278    fn dimension(&self) -> Dimension {
279        self.data_type.dimension()
280    }
281}
282
283#[cfg(test)]
284mod test {
285    use geoarrow_schema::{CoordType, Dimension};
286    use geoarrow_test::raw;
287
288    use super::*;
289    use crate::test::geometrycollection;
290
291    #[test]
292    fn try_from_arrow() {
293        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
294            for dim in [
295                Dimension::XY,
296                Dimension::XYZ,
297                Dimension::XYM,
298                Dimension::XYZM,
299            ] {
300                for prefer_multi in [true, false] {
301                    let geo_arr = geometrycollection::array(coord_type, dim, prefer_multi);
302
303                    let point_type = geo_arr.extension_type().clone();
304                    let field = point_type.to_field("geometry", true);
305
306                    let arrow_arr = geo_arr.to_array_ref();
307
308                    let geo_arr2: GeometryCollectionArray =
309                        (arrow_arr.as_ref(), point_type).try_into().unwrap();
310                    let geo_arr3: GeometryCollectionArray =
311                        (arrow_arr.as_ref(), &field).try_into().unwrap();
312
313                    assert_eq!(geo_arr, geo_arr2);
314                    assert_eq!(geo_arr, geo_arr3);
315                }
316            }
317        }
318    }
319
320    #[test]
321    fn test_nullability() {
322        let geoms = raw::geometrycollection::xy::geoms();
323        let null_idxs = geoms
324            .iter()
325            .enumerate()
326            .filter_map(|(i, geom)| if geom.is_none() { Some(i) } else { None })
327            .collect::<Vec<_>>();
328
329        let typ = GeometryCollectionType::new(Dimension::XY, Default::default());
330        let geo_arr = GeometryCollectionBuilder::from_nullable_geometry_collections(&geoms, typ)
331            .unwrap()
332            .finish();
333
334        for null_idx in &null_idxs {
335            assert!(geo_arr.is_null(*null_idx));
336        }
337    }
338
339    #[test]
340    fn test_logical_nulls() {
341        let geoms = raw::geometrycollection::xy::geoms();
342        let expected_nulls = NullBuffer::from_iter(geoms.iter().map(|g| g.is_some()));
343
344        let typ = GeometryCollectionType::new(Dimension::XY, Default::default());
345        let geo_arr = GeometryCollectionBuilder::from_nullable_geometry_collections(&geoms, typ)
346            .unwrap()
347            .finish();
348
349        assert_eq!(geo_arr.logical_nulls().unwrap(), expected_nulls);
350    }
351}