geoarrow_array/array/
geometry.rs

1use std::collections::HashSet;
2use std::sync::Arc;
3
4use arrow_array::cast::AsArray;
5use arrow_array::{Array, ArrayRef, OffsetSizeTrait, UnionArray};
6use arrow_buffer::{NullBuffer, ScalarBuffer};
7use arrow_schema::{ArrowError, DataType, Field, UnionMode};
8use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
9use geoarrow_schema::type_id::GeometryTypeId;
10use geoarrow_schema::{
11    CoordType, Dimension, GeoArrowType, GeometryCollectionType, GeometryType, LineStringType,
12    Metadata, MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType,
13};
14
15use crate::array::*;
16use crate::builder::*;
17use crate::capacity::GeometryCapacity;
18use crate::scalar::Geometry;
19use crate::trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow};
20
21/// Macro to implement child array accessor with proper slicing support
22///
23/// This macro generates code to access a child array from a GeometryArray,
24/// handling both sliced and non-sliced cases.
25///
26/// # Arguments
27/// * `$geom_arr` - The child geometry array
28///
29/// # Returns
30/// A cloned or sliced version of the child array
31macro_rules! impl_child_accessor {
32    ($self:expr, $geom_arr:expr) => {{
33        let geom_arr = $geom_arr;
34        if !$self.is_sliced() {
35            // Fast path: if not sliced, just clone the array
36            geom_arr.clone()
37        } else {
38            // Slow path: find the range of this geometry type in the sliced view
39            let target_type_id = geom_arr.geometry_type_id();
40            let first_index = $self.type_ids.iter().position(|id| *id == target_type_id);
41            let last_index = $self.type_ids.iter().rposition(|id| *id == target_type_id);
42
43            match (first_index, last_index) {
44                (Some(first), Some(last)) => {
45                    // Found both first and last occurrence
46                    let first_offset = $self.offsets[first] as usize;
47                    let last_offset = $self.offsets[last] as usize;
48                    geom_arr.slice(first_offset, last_offset - first_offset + 1)
49                }
50                (Some(first), None) => {
51                    unreachable!("Shouldn't happen: found first offset but not last: {first}");
52                }
53                (None, Some(last)) => {
54                    unreachable!("Shouldn't happen: found last offset but not first: {last}");
55                }
56                (None, None) => {
57                    // This geometry type is not present in the sliced view
58                    geom_arr.slice(0, 0)
59                }
60            }
61        }
62    }};
63}
64
65/// An immutable array of geometries of unknown geometry type and dimension.
66///
67// # Invariants
68//
69// - All arrays must have the same dimension
70// - All arrays must have the same coordinate layout (interleaved or separated)
71//
72// - 1: Point
73// - 2: LineString
74// - 3: Polygon
75// - 4: MultiPoint
76// - 5: MultiLineString
77// - 6: MultiPolygon
78// - 7: GeometryCollection
79// - 11: Point Z
80// - 12: LineString Z
81// - 13: Polygon Z
82// - 14: MultiPoint Z
83// - 15: MultiLineString Z
84// - 16: MultiPolygon Z
85// - 17: GeometryCollection Z
86// - 21: Point M
87// - 22: LineString M
88// - 23: Polygon M
89// - 24: MultiPoint M
90// - 25: MultiLineString M
91// - 26: MultiPolygon M
92// - 27: GeometryCollection M
93// - 31: Point ZM
94// - 32: LineString ZM
95// - 33: Polygon ZM
96// - 34: MultiPoint ZM
97// - 35: MultiLineString ZM
98// - 36: MultiPolygon ZM
99// - 37: GeometryCollection ZM
100#[derive(Debug, Clone)]
101pub struct GeometryArray {
102    pub(crate) data_type: GeometryType,
103
104    /// Invariant: every item in `type_ids` is `> 0 && < fields.len()` if `type_ids` are not
105    /// provided. If `type_ids` exist in the NativeType, then every item in `type_ids` is `> 0 && `
106    pub(crate) type_ids: ScalarBuffer<i8>,
107
108    /// Invariant: `offsets.len() == type_ids.len()`
109    pub(crate) offsets: ScalarBuffer<i32>,
110
111    /// An array of PointArray, ordered XY, XYZ, XYM, XYZM
112    pub(crate) points: [PointArray; 4],
113    pub(crate) line_strings: [LineStringArray; 4],
114    pub(crate) polygons: [PolygonArray; 4],
115    pub(crate) mpoints: [MultiPointArray; 4],
116    pub(crate) mline_strings: [MultiLineStringArray; 4],
117    pub(crate) mpolygons: [MultiPolygonArray; 4],
118    pub(crate) gcs: [GeometryCollectionArray; 4],
119}
120
121impl GeometryArray {
122    /// Create a new GeometryArray from parts
123    ///
124    /// # Implementation
125    ///
126    /// This function is `O(1)`.
127    ///
128    /// # Panics
129    ///
130    /// - if the validity is not `None` and its length is different from the number of geometries
131    /// - if the largest geometry offset does not match the number of coordinates
132    #[allow(clippy::too_many_arguments)]
133    pub fn new(
134        type_ids: ScalarBuffer<i8>,
135        offsets: ScalarBuffer<i32>,
136        points: [PointArray; 4],
137        line_strings: [LineStringArray; 4],
138        polygons: [PolygonArray; 4],
139        mpoints: [MultiPointArray; 4],
140        mline_strings: [MultiLineStringArray; 4],
141        mpolygons: [MultiPolygonArray; 4],
142        gcs: [GeometryCollectionArray; 4],
143        metadata: Arc<Metadata>,
144    ) -> Self {
145        // Validate that all arrays have the same coord type.
146        let mut coord_types = HashSet::new();
147        points.iter().for_each(|arr| {
148            coord_types.insert(arr.data_type.coord_type());
149        });
150        line_strings.iter().for_each(|arr| {
151            coord_types.insert(arr.data_type.coord_type());
152        });
153        polygons.iter().for_each(|arr| {
154            coord_types.insert(arr.data_type.coord_type());
155        });
156        mpoints.iter().for_each(|arr| {
157            coord_types.insert(arr.data_type.coord_type());
158        });
159        mline_strings.iter().for_each(|arr| {
160            coord_types.insert(arr.data_type.coord_type());
161        });
162        mpolygons.iter().for_each(|arr| {
163            coord_types.insert(arr.data_type.coord_type());
164        });
165
166        assert!(coord_types.len() == 1);
167        let coord_type = coord_types.into_iter().next().unwrap();
168
169        Self {
170            data_type: GeometryType::new(metadata).with_coord_type(coord_type),
171            type_ids,
172            offsets,
173            points,
174            line_strings,
175            polygons,
176            mpoints,
177            mline_strings,
178            mpolygons,
179            gcs,
180        }
181    }
182
183    /// The lengths of each buffer contained in this array.
184    pub fn buffer_lengths(&self) -> GeometryCapacity {
185        GeometryCapacity::new(
186            0,
187            core::array::from_fn(|i| self.points[i].buffer_lengths()),
188            core::array::from_fn(|i| self.line_strings[i].buffer_lengths()),
189            core::array::from_fn(|i| self.polygons[i].buffer_lengths()),
190            core::array::from_fn(|i| self.mpoints[i].buffer_lengths()),
191            core::array::from_fn(|i| self.mline_strings[i].buffer_lengths()),
192            core::array::from_fn(|i| self.mpolygons[i].buffer_lengths()),
193            core::array::from_fn(|i| self.gcs[i].buffer_lengths()),
194        )
195    }
196
197    /// Returns the `type_ids` buffer for this array
198    pub fn type_ids(&self) -> &ScalarBuffer<i8> {
199        &self.type_ids
200    }
201
202    /// Returns the `offsets` buffer for this array
203    pub fn offsets(&self) -> &ScalarBuffer<i32> {
204        &self.offsets
205    }
206
207    /// Determine whether this array has been sliced.
208    ///
209    /// This array has been sliced iff the total number of geometries in the child arrays does not
210    /// equal the number of values in the type_ids array.
211    ///
212    /// Since the length of each child array is pre-computed, this operation is O(1).
213    fn is_sliced(&self) -> bool {
214        let mut physical_geom_len = 0;
215        physical_geom_len += self.points.iter().fold(0, |acc, arr| acc + arr.len());
216        physical_geom_len += self.line_strings.iter().fold(0, |acc, arr| acc + arr.len());
217        physical_geom_len += self.polygons.iter().fold(0, |acc, arr| acc + arr.len());
218        physical_geom_len += self.mpoints.iter().fold(0, |acc, arr| acc + arr.len());
219        physical_geom_len += self
220            .mline_strings
221            .iter()
222            .fold(0, |acc, arr| acc + arr.len());
223        physical_geom_len += self.mpolygons.iter().fold(0, |acc, arr| acc + arr.len());
224        physical_geom_len += self.gcs.iter().fold(0, |acc, arr| acc + arr.len());
225
226        physical_geom_len != self.type_ids.len()
227    }
228
229    /// Access the PointArray child for the given dimension.
230    ///
231    /// Note that ordering will be maintained within the child array, but there may have been other
232    /// geometries in between in the parent array.
233    pub fn point_child(&self, dim: Dimension) -> PointArray {
234        impl_child_accessor!(self, &self.points[dim.order()])
235    }
236
237    /// Access the LineStringArray child for the given dimension.
238    ///
239    /// Note that ordering will be maintained within the child array, but there may have been other
240    /// geometries in between in the parent array.
241    pub fn line_string_child(&self, dim: Dimension) -> LineStringArray {
242        impl_child_accessor!(self, &self.line_strings[dim.order()])
243    }
244
245    /// Access the PolygonArray child for the given dimension.
246    ///
247    /// Note that ordering will be maintained within the child array, but there may have been other
248    /// geometries in between in the parent array.
249    pub fn polygon_child(&self, dim: Dimension) -> PolygonArray {
250        impl_child_accessor!(self, &self.polygons[dim.order()])
251    }
252
253    /// Access the MultiPointArray child for the given dimension.
254    ///
255    /// Note that ordering will be maintained within the child array, but there may have been other
256    /// geometries in between in the parent array.
257    pub fn multi_point_child(&self, dim: Dimension) -> MultiPointArray {
258        impl_child_accessor!(self, &self.mpoints[dim.order()])
259    }
260
261    /// Access the MultiLineStringArray child for the given dimension.
262    ///
263    /// Note that ordering will be maintained within the child array, but there may have been other
264    /// geometries in between in the parent array.
265    pub fn multi_line_string_child(&self, dim: Dimension) -> MultiLineStringArray {
266        impl_child_accessor!(self, &self.mline_strings[dim.order()])
267    }
268
269    /// Access the MultiPolygonArray child for the given dimension.
270    ///
271    /// Note that ordering will be maintained within the child array, but there may have been other
272    /// geometries in between in the parent array.
273    pub fn multi_polygon_child(&self, dim: Dimension) -> MultiPolygonArray {
274        impl_child_accessor!(self, &self.mpolygons[dim.order()])
275    }
276
277    /// Access the GeometryCollectionArray child for the given dimension.
278    ///
279    /// Note that ordering will be maintained within the child array, but there may have been other
280    /// geometries in between in the parent array.
281    pub fn geometry_collection_child(&self, dim: Dimension) -> GeometryCollectionArray {
282        impl_child_accessor!(self, &self.gcs[dim.order()])
283    }
284
285    // TODO: handle slicing
286    pub(crate) fn has_points(&self, dim: Dimension) -> bool {
287        !self.points[dim.order()].is_empty()
288    }
289
290    pub(crate) fn has_line_strings(&self, dim: Dimension) -> bool {
291        !self.line_strings[dim.order()].is_empty()
292    }
293
294    pub(crate) fn has_polygons(&self, dim: Dimension) -> bool {
295        !self.polygons[dim.order()].is_empty()
296    }
297
298    pub(crate) fn has_multi_points(&self, dim: Dimension) -> bool {
299        !self.mpoints[dim.order()].is_empty()
300    }
301
302    pub(crate) fn has_multi_line_strings(&self, dim: Dimension) -> bool {
303        !self.mline_strings[dim.order()].is_empty()
304    }
305
306    pub(crate) fn has_multi_polygons(&self, dim: Dimension) -> bool {
307        !self.mpolygons[dim.order()].is_empty()
308    }
309
310    #[allow(dead_code)]
311    pub(crate) fn has_geometry_collections(&self, dim: Dimension) -> bool {
312        !self.gcs[dim.order()].is_empty()
313    }
314
315    /// Return `true` if this array holds at least one non-empty array of the given dimension
316    pub fn has_dimension(&self, dim: Dimension) -> bool {
317        self.has_points(dim)
318            || self.has_line_strings(dim)
319            || self.has_polygons(dim)
320            || self.has_multi_points(dim)
321            || self.has_multi_line_strings(dim)
322            || self.has_multi_polygons(dim)
323    }
324
325    /// Return `true` if this array holds at least one geometry array of the given dimension and no
326    /// arrays of any other dimension.
327    pub fn has_only_dimension(&self, dim: Dimension) -> bool {
328        use Dimension::*;
329        let existant_dims = [
330            self.has_dimension(XY),
331            self.has_dimension(XYZ),
332            self.has_dimension(XYM),
333            self.has_dimension(XYZM),
334        ];
335        existant_dims.iter().map(|b| *b as u8).sum::<u8>() == 1 && existant_dims[dim.order()]
336    }
337
338    /// The number of bytes occupied by this array.
339    pub fn num_bytes(&self) -> usize {
340        self.buffer_lengths().num_bytes()
341    }
342
343    /// Slice this [`GeometryArray`].
344    ///
345    /// # Implementation
346    ///
347    /// This operation is `O(F)` where `F` is the number of fields.
348    ///
349    /// # Panic
350    ///
351    /// This function panics iff `offset + length > self.len()`.
352    #[inline]
353    pub fn slice(&self, offset: usize, length: usize) -> Self {
354        assert!(
355            offset + length <= self.len(),
356            "offset + length may not exceed length of array"
357        );
358        Self {
359            data_type: self.data_type.clone(),
360            type_ids: self.type_ids.slice(offset, length),
361            offsets: self.offsets.slice(offset, length),
362
363            points: self.points.clone(),
364            line_strings: self.line_strings.clone(),
365            polygons: self.polygons.clone(),
366            mpoints: self.mpoints.clone(),
367            mline_strings: self.mline_strings.clone(),
368            mpolygons: self.mpolygons.clone(),
369            gcs: self.gcs.clone(),
370        }
371    }
372
373    /// Change the [`CoordType`] of this array.
374    pub fn into_coord_type(self, coord_type: CoordType) -> Self {
375        Self {
376            data_type: self.data_type.with_coord_type(coord_type),
377            points: self.points.map(|arr| arr.into_coord_type(coord_type)),
378            line_strings: self.line_strings.map(|arr| arr.into_coord_type(coord_type)),
379            polygons: self.polygons.map(|arr| arr.into_coord_type(coord_type)),
380            mpoints: self.mpoints.map(|arr| arr.into_coord_type(coord_type)),
381            mline_strings: self
382                .mline_strings
383                .map(|arr| arr.into_coord_type(coord_type)),
384            mpolygons: self.mpolygons.map(|arr| arr.into_coord_type(coord_type)),
385            gcs: self.gcs.map(|arr| arr.into_coord_type(coord_type)),
386            ..self
387        }
388    }
389
390    /// Change the [`Metadata`] of this array.
391    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
392        Self {
393            data_type: self.data_type.with_metadata(metadata),
394            ..self
395        }
396    }
397
398    // TODO: recursively expand the types from the geometry collection array
399    #[allow(dead_code)]
400    pub(crate) fn contained_types(&self) -> HashSet<GeoArrowType> {
401        let mut types = HashSet::new();
402        self.points.iter().for_each(|arr| {
403            if !arr.is_empty() {
404                types.insert(arr.data_type());
405            }
406        });
407        self.line_strings.iter().for_each(|arr| {
408            if !arr.is_empty() {
409                types.insert(arr.data_type());
410            }
411        });
412        self.polygons.iter().for_each(|arr| {
413            if !arr.is_empty() {
414                types.insert(arr.data_type());
415            }
416        });
417        self.mpoints.iter().for_each(|arr| {
418            if !arr.is_empty() {
419                types.insert(arr.data_type());
420            }
421        });
422        self.mline_strings.iter().for_each(|arr| {
423            if !arr.is_empty() {
424                types.insert(arr.data_type());
425            }
426        });
427        self.mpolygons.iter().for_each(|arr| {
428            if !arr.is_empty() {
429                types.insert(arr.data_type());
430            }
431        });
432        self.gcs.iter().for_each(|arr| {
433            if !arr.is_empty() {
434                types.insert(arr.data_type());
435            }
436        });
437
438        types
439    }
440}
441
442impl GeoArrowArray for GeometryArray {
443    fn as_any(&self) -> &dyn std::any::Any {
444        self
445    }
446
447    fn into_array_ref(self) -> ArrayRef {
448        Arc::new(self.into_arrow())
449    }
450
451    fn to_array_ref(&self) -> ArrayRef {
452        self.clone().into_array_ref()
453    }
454
455    #[inline]
456    fn len(&self) -> usize {
457        // Note that `type_ids` is sliced as usual, and thus always has the correct length.
458        self.type_ids.len()
459    }
460
461    #[inline]
462    fn logical_nulls(&self) -> Option<NullBuffer> {
463        self.to_array_ref().logical_nulls()
464    }
465
466    #[inline]
467    fn logical_null_count(&self) -> usize {
468        self.to_array_ref().logical_null_count()
469    }
470
471    #[inline]
472    fn is_null(&self, i: usize) -> bool {
473        let type_id = self.type_ids[i];
474        let offset = self.offsets[i] as usize;
475        let dim = (type_id / 10) as usize;
476        match type_id % 10 {
477            PointType::GEOMETRY_TYPE_OFFSET => self.points[dim].is_null(offset),
478            LineStringType::GEOMETRY_TYPE_OFFSET => self.line_strings[dim].is_null(offset),
479            PolygonType::GEOMETRY_TYPE_OFFSET => self.polygons[dim].is_null(offset),
480            MultiPointType::GEOMETRY_TYPE_OFFSET => self.mpoints[dim].is_null(offset),
481            MultiLineStringType::GEOMETRY_TYPE_OFFSET => self.mline_strings[dim].is_null(offset),
482            MultiPolygonType::GEOMETRY_TYPE_OFFSET => self.mpolygons[dim].is_null(offset),
483            GeometryCollectionType::GEOMETRY_TYPE_OFFSET => self.gcs[dim].is_null(offset),
484            _ => unreachable!("unknown type_id {}", type_id),
485        }
486    }
487
488    fn data_type(&self) -> GeoArrowType {
489        GeoArrowType::Geometry(self.data_type.clone())
490    }
491
492    fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray> {
493        Arc::new(self.slice(offset, length))
494    }
495
496    fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray> {
497        Arc::new(self.with_metadata(metadata))
498    }
499}
500
501impl<'a> GeoArrowArrayAccessor<'a> for GeometryArray {
502    type Item = Geometry<'a>;
503
504    unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
505        let type_id = self.type_ids[index];
506        let offset = self.offsets[index] as usize;
507
508        let dim = (type_id / 10) as usize;
509
510        let result = match type_id % 10 {
511            PointType::GEOMETRY_TYPE_OFFSET => Geometry::Point(self.points[dim].value(offset)?),
512            LineStringType::GEOMETRY_TYPE_OFFSET => {
513                Geometry::LineString(self.line_strings[dim].value(offset)?)
514            }
515            PolygonType::GEOMETRY_TYPE_OFFSET => {
516                Geometry::Polygon(self.polygons[dim].value(offset)?)
517            }
518            MultiPointType::GEOMETRY_TYPE_OFFSET => {
519                Geometry::MultiPoint(self.mpoints[dim].value(offset)?)
520            }
521            MultiLineStringType::GEOMETRY_TYPE_OFFSET => {
522                Geometry::MultiLineString(self.mline_strings[dim].value(offset)?)
523            }
524            MultiPolygonType::GEOMETRY_TYPE_OFFSET => {
525                Geometry::MultiPolygon(self.mpolygons[dim].value(offset)?)
526            }
527            GeometryCollectionType::GEOMETRY_TYPE_OFFSET => {
528                Geometry::GeometryCollection(self.gcs[dim].value(offset)?)
529            }
530            _ => unreachable!("unknown type_id {}", type_id),
531        };
532        Ok(result)
533    }
534}
535
536impl IntoArrow for GeometryArray {
537    type ArrowArray = UnionArray;
538    type ExtensionType = GeometryType;
539
540    fn into_arrow(self) -> Self::ArrowArray {
541        let union_fields = match self.data_type.data_type() {
542            DataType::Union(union_fields, _) => union_fields,
543            _ => unreachable!(),
544        };
545
546        // https://stackoverflow.com/a/34406459/7319250
547        let mut child_arrays: Vec<Option<ArrayRef>> = vec![None; 28];
548        for (i, arr) in self.points.into_iter().enumerate() {
549            child_arrays[i * 7] = Some(arr.into_array_ref());
550        }
551        for (i, arr) in self.line_strings.into_iter().enumerate() {
552            child_arrays[i * 7 + 1] = Some(arr.into_array_ref());
553        }
554        for (i, arr) in self.polygons.into_iter().enumerate() {
555            child_arrays[i * 7 + 2] = Some(arr.into_array_ref());
556        }
557        for (i, arr) in self.mpoints.into_iter().enumerate() {
558            child_arrays[i * 7 + 3] = Some(arr.into_array_ref());
559        }
560        for (i, arr) in self.mline_strings.into_iter().enumerate() {
561            child_arrays[i * 7 + 4] = Some(arr.into_array_ref());
562        }
563        for (i, arr) in self.mpolygons.into_iter().enumerate() {
564            child_arrays[i * 7 + 5] = Some(arr.into_array_ref());
565        }
566        for (i, arr) in self.gcs.into_iter().enumerate() {
567            child_arrays[i * 7 + 6] = Some(arr.into_array_ref());
568        }
569
570        UnionArray::try_new(
571            union_fields,
572            self.type_ids,
573            Some(self.offsets),
574            child_arrays.into_iter().map(|x| x.unwrap()).collect(),
575        )
576        .unwrap()
577    }
578
579    fn extension_type(&self) -> &Self::ExtensionType {
580        &self.data_type
581    }
582}
583
584impl TryFrom<(&UnionArray, GeometryType)> for GeometryArray {
585    type Error = GeoArrowError;
586
587    fn try_from((value, typ): (&UnionArray, GeometryType)) -> GeoArrowResult<Self> {
588        let mut points: [Option<PointArray>; 4] = Default::default();
589        let mut line_strings: [Option<LineStringArray>; 4] = Default::default();
590        let mut polygons: [Option<PolygonArray>; 4] = Default::default();
591        let mut mpoints: [Option<MultiPointArray>; 4] = Default::default();
592        let mut mline_strings: [Option<MultiLineStringArray>; 4] = Default::default();
593        let mut mpolygons: [Option<MultiPolygonArray>; 4] = Default::default();
594        let mut gcs: [Option<GeometryCollectionArray>; 4] = Default::default();
595
596        let coord_type = typ.coord_type();
597        let metadata = typ.metadata().clone();
598
599        // Note: From the spec:
600        //
601        // The child arrays should not themselves contain GeoArrow metadata. Only the top-level
602        // geometry array should contain GeoArrow metadata.
603        match value.data_type() {
604            DataType::Union(fields, mode) => {
605                if !matches!(mode, UnionMode::Dense) {
606                    return Err(ArrowError::SchemaError("Expected dense union".to_string()).into());
607                }
608
609                for (type_id, _field) in fields.iter() {
610                    let dim = Dimension::from_order((type_id / 10) as _)?;
611                    let index = dim.order();
612
613                    match type_id % 10 {
614                        1 => {
615                            points[index] = Some(
616                                (
617                                    value.child(type_id).as_ref(),
618                                    PointType::new(dim, Default::default())
619                                        .with_coord_type(coord_type),
620                                )
621                                    .try_into()?,
622                            );
623                        }
624                        2 => {
625                            line_strings[index] = Some(
626                                (
627                                    value.child(type_id).as_ref(),
628                                    LineStringType::new(dim, Default::default())
629                                        .with_coord_type(coord_type),
630                                )
631                                    .try_into()?,
632                            );
633                        }
634                        3 => {
635                            polygons[index] = Some(
636                                (
637                                    value.child(type_id).as_ref(),
638                                    PolygonType::new(dim, Default::default())
639                                        .with_coord_type(coord_type),
640                                )
641                                    .try_into()?,
642                            );
643                        }
644                        4 => {
645                            mpoints[index] = Some(
646                                (
647                                    value.child(type_id).as_ref(),
648                                    MultiPointType::new(dim, Default::default())
649                                        .with_coord_type(coord_type),
650                                )
651                                    .try_into()?,
652                            );
653                        }
654                        5 => {
655                            mline_strings[index] = Some(
656                                (
657                                    value.child(type_id).as_ref(),
658                                    MultiLineStringType::new(dim, Default::default())
659                                        .with_coord_type(coord_type),
660                                )
661                                    .try_into()?,
662                            );
663                        }
664                        6 => {
665                            mpolygons[index] = Some(
666                                (
667                                    value.child(type_id).as_ref(),
668                                    MultiPolygonType::new(dim, Default::default())
669                                        .with_coord_type(coord_type),
670                                )
671                                    .try_into()?,
672                            );
673                        }
674                        7 => {
675                            gcs[index] = Some(
676                                (
677                                    value.child(type_id).as_ref(),
678                                    GeometryCollectionType::new(dim, Default::default())
679                                        .with_coord_type(coord_type),
680                                )
681                                    .try_into()?,
682                            );
683                        }
684                        _ => {
685                            return Err(GeoArrowError::InvalidGeoArrow(format!(
686                                "Unexpected type_id when converting to GeometryArray {type_id}",
687                            )));
688                        }
689                    }
690                }
691            }
692            _ => {
693                return Err(GeoArrowError::InvalidGeoArrow(
694                    "expected union type when converting to GeometryArray".to_string(),
695                ));
696            }
697        };
698
699        let type_ids = value.type_ids().clone();
700        // This is after checking for dense union
701        let offsets = value.offsets().unwrap().clone();
702
703        // We need to convert the array [Option<PointArray>; 4] into `[PointArray; 4]`.
704        // But we also need to ensure the underlying PointArray has the correct `Dimension` for the
705        // given array index.
706        // In order to do this, we need the index of the array, which `map` doesn't give us. And
707        // using `core::array::from_fn` doesn't let us move out of the existing array.
708        // So we mutate the existing array of `[Option<PointArray>; 4]` to ensure all values are
709        // `Some`, and then later we call `unwrap` on all array values in a `map`.
710        points.iter_mut().enumerate().for_each(|(i, arr)| {
711            let new_val = if let Some(arr) = arr.take() {
712                arr
713            } else {
714                PointBuilder::new(
715                    PointType::new(Dimension::from_order(i).unwrap(), Default::default())
716                        .with_coord_type(coord_type),
717                )
718                .finish()
719            };
720            arr.replace(new_val);
721        });
722        line_strings.iter_mut().enumerate().for_each(|(i, arr)| {
723            let new_val = if let Some(arr) = arr.take() {
724                arr
725            } else {
726                LineStringBuilder::new(
727                    LineStringType::new(Dimension::from_order(i).unwrap(), Default::default())
728                        .with_coord_type(coord_type),
729                )
730                .finish()
731            };
732            arr.replace(new_val);
733        });
734        polygons.iter_mut().enumerate().for_each(|(i, arr)| {
735            let new_val = if let Some(arr) = arr.take() {
736                arr
737            } else {
738                PolygonBuilder::new(
739                    PolygonType::new(Dimension::from_order(i).unwrap(), Default::default())
740                        .with_coord_type(coord_type),
741                )
742                .finish()
743            };
744            arr.replace(new_val);
745        });
746        mpoints.iter_mut().enumerate().for_each(|(i, arr)| {
747            let new_val = if let Some(arr) = arr.take() {
748                arr
749            } else {
750                MultiPointBuilder::new(
751                    MultiPointType::new(Dimension::from_order(i).unwrap(), Default::default())
752                        .with_coord_type(coord_type),
753                )
754                .finish()
755            };
756            arr.replace(new_val);
757        });
758        mline_strings.iter_mut().enumerate().for_each(|(i, arr)| {
759            let new_val = if let Some(arr) = arr.take() {
760                arr
761            } else {
762                MultiLineStringBuilder::new(
763                    MultiLineStringType::new(Dimension::from_order(i).unwrap(), Default::default())
764                        .with_coord_type(coord_type),
765                )
766                .finish()
767            };
768            arr.replace(new_val);
769        });
770        mpolygons.iter_mut().enumerate().for_each(|(i, arr)| {
771            let new_val = if let Some(arr) = arr.take() {
772                arr
773            } else {
774                MultiPolygonBuilder::new(
775                    MultiPolygonType::new(Dimension::from_order(i).unwrap(), Default::default())
776                        .with_coord_type(coord_type),
777                )
778                .finish()
779            };
780            arr.replace(new_val);
781        });
782        gcs.iter_mut().enumerate().for_each(|(i, arr)| {
783            let new_val = if let Some(arr) = arr.take() {
784                arr
785            } else {
786                GeometryCollectionBuilder::new(
787                    GeometryCollectionType::new(
788                        Dimension::from_order(i).unwrap(),
789                        Default::default(),
790                    )
791                    .with_coord_type(coord_type),
792                )
793                .finish()
794            };
795            arr.replace(new_val);
796        });
797
798        Ok(Self::new(
799            type_ids,
800            offsets,
801            points.map(|x| x.unwrap()),
802            line_strings.map(|x| x.unwrap()),
803            polygons.map(|x| x.unwrap()),
804            mpoints.map(|x| x.unwrap()),
805            mline_strings.map(|x| x.unwrap()),
806            mpolygons.map(|x| x.unwrap()),
807            gcs.map(|x| x.unwrap()),
808            metadata,
809        ))
810    }
811}
812
813impl TryFrom<(&dyn Array, GeometryType)> for GeometryArray {
814    type Error = GeoArrowError;
815
816    fn try_from((value, typ): (&dyn Array, GeometryType)) -> GeoArrowResult<Self> {
817        match value.data_type() {
818            DataType::Union(_, _) => (value.as_union(), typ).try_into(),
819            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
820                "Unexpected GeometryArray DataType: {dt:?}",
821            ))),
822        }
823    }
824}
825
826impl TryFrom<(&dyn Array, &Field)> for GeometryArray {
827    type Error = GeoArrowError;
828
829    fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult<Self> {
830        let typ = field.try_extension_type::<GeometryType>()?;
831        (arr, typ).try_into()
832    }
833}
834
835impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, GeometryType)> for GeometryArray {
836    type Error = GeoArrowError;
837
838    fn try_from(value: (GenericWkbArray<O>, GeometryType)) -> GeoArrowResult<Self> {
839        let mut_arr: GeometryBuilder = value.try_into()?;
840        Ok(mut_arr.finish())
841    }
842}
843
844pub(crate) trait DimensionIndex: Sized {
845    /// Get the positional index of the internal array for the given dimension.
846    fn order(&self) -> usize;
847
848    fn from_order(index: usize) -> GeoArrowResult<Self>;
849}
850
851impl DimensionIndex for Dimension {
852    fn order(&self) -> usize {
853        match self {
854            Self::XY => 0,
855            Self::XYZ => 1,
856            Self::XYM => 2,
857            Self::XYZM => 3,
858        }
859    }
860
861    fn from_order(index: usize) -> GeoArrowResult<Self> {
862        match index {
863            0 => Ok(Self::XY),
864            1 => Ok(Self::XYZ),
865            2 => Ok(Self::XYM),
866            3 => Ok(Self::XYZM),
867            i => {
868                Err(ArrowError::SchemaError(format!("unsupported index in from_order: {i}")).into())
869            }
870        }
871    }
872}
873
874impl PartialEq for GeometryArray {
875    fn eq(&self, other: &Self) -> bool {
876        self.type_ids == other.type_ids
877            && self.offsets == other.offsets
878            && self.points == other.points
879            && self.line_strings == other.line_strings
880            && self.polygons == other.polygons
881            && self.mpoints == other.mpoints
882            && self.mline_strings == other.mline_strings
883            && self.mpolygons == other.mpolygons
884            && self.gcs == other.gcs
885    }
886}
887
888type ChildrenArrays = (
889    [PointArray; 4],
890    [LineStringArray; 4],
891    [PolygonArray; 4],
892    [MultiPointArray; 4],
893    [MultiLineStringArray; 4],
894    [MultiPolygonArray; 4],
895    [GeometryCollectionArray; 4],
896);
897
898/// Initialize empty children with the given coord type.
899///
900/// This is used in the impls like `From<PointArray> for GeometryArray`. This lets us initialize
901/// all empty children and then just swap in the one array that's valid.
902fn empty_children(coord_type: CoordType) -> ChildrenArrays {
903    (
904        core::array::from_fn(|i| {
905            PointBuilder::new(
906                PointType::new(Dimension::from_order(i).unwrap(), Default::default())
907                    .with_coord_type(coord_type),
908            )
909            .finish()
910        }),
911        core::array::from_fn(|i| {
912            LineStringBuilder::new(
913                LineStringType::new(Dimension::from_order(i).unwrap(), Default::default())
914                    .with_coord_type(coord_type),
915            )
916            .finish()
917        }),
918        core::array::from_fn(|i| {
919            PolygonBuilder::new(
920                PolygonType::new(Dimension::from_order(i).unwrap(), Default::default())
921                    .with_coord_type(coord_type),
922            )
923            .finish()
924        }),
925        core::array::from_fn(|i| {
926            MultiPointBuilder::new(
927                MultiPointType::new(Dimension::from_order(i).unwrap(), Default::default())
928                    .with_coord_type(coord_type),
929            )
930            .finish()
931        }),
932        core::array::from_fn(|i| {
933            MultiLineStringBuilder::new(
934                MultiLineStringType::new(Dimension::from_order(i).unwrap(), Default::default())
935                    .with_coord_type(coord_type),
936            )
937            .finish()
938        }),
939        core::array::from_fn(|i| {
940            MultiPolygonBuilder::new(
941                MultiPolygonType::new(Dimension::from_order(i).unwrap(), Default::default())
942                    .with_coord_type(coord_type),
943            )
944            .finish()
945        }),
946        core::array::from_fn(|i| {
947            GeometryCollectionBuilder::new(
948                GeometryCollectionType::new(Dimension::from_order(i).unwrap(), Default::default())
949                    .with_coord_type(coord_type),
950            )
951            .finish()
952        }),
953    )
954}
955
956macro_rules! impl_primitive_cast {
957    ($source_array:ty, $value_edit:tt) => {
958        impl From<$source_array> for GeometryArray {
959            fn from(value: $source_array) -> Self {
960                let coord_type = value.data_type.coord_type();
961                let dim = value.data_type.dimension();
962                let metadata = value.data_type.metadata().clone();
963
964                let type_ids = vec![value.geometry_type_id(); value.len()].into();
965                let offsets = ScalarBuffer::from_iter(0..value.len() as i32);
966                let data_type = GeometryType::new(metadata).with_coord_type(coord_type);
967                let mut children = empty_children(coord_type);
968
969                children.$value_edit[dim.order()] = value;
970                Self {
971                    data_type,
972                    type_ids,
973                    offsets,
974                    points: children.0,
975                    line_strings: children.1,
976                    polygons: children.2,
977                    mpoints: children.3,
978                    mline_strings: children.4,
979                    mpolygons: children.5,
980                    gcs: children.6,
981                }
982            }
983        }
984    };
985}
986
987impl_primitive_cast!(PointArray, 0);
988impl_primitive_cast!(LineStringArray, 1);
989impl_primitive_cast!(PolygonArray, 2);
990impl_primitive_cast!(MultiPointArray, 3);
991impl_primitive_cast!(MultiLineStringArray, 4);
992impl_primitive_cast!(MultiPolygonArray, 5);
993impl_primitive_cast!(GeometryCollectionArray, 6);
994
995#[cfg(test)]
996mod test {
997    use ::wkt::{Wkt, wkt};
998    use geo_traits::to_geo::ToGeoGeometry;
999    use geoarrow_schema::Crs;
1000    use geoarrow_test::raw;
1001
1002    use super::*;
1003    use crate::test::{linestring, multilinestring, multipoint, multipolygon, point, polygon};
1004
1005    fn geoms() -> Vec<geo_types::Geometry> {
1006        vec![
1007            point::p0().into(),
1008            point::p1().into(),
1009            point::p2().into(),
1010            linestring::ls0().into(),
1011            linestring::ls1().into(),
1012            polygon::p0().into(),
1013            polygon::p1().into(),
1014            multipoint::mp0().into(),
1015            multipoint::mp1().into(),
1016            multilinestring::ml0().into(),
1017            multilinestring::ml1().into(),
1018            multipolygon::mp0().into(),
1019            multipolygon::mp1().into(),
1020        ]
1021    }
1022
1023    fn geom_array(coord_type: CoordType) -> GeometryArray {
1024        let geoms = geoms().into_iter().map(Some).collect::<Vec<_>>();
1025        let typ = GeometryType::new(Default::default()).with_coord_type(coord_type);
1026        GeometryBuilder::from_nullable_geometries(&geoms, typ)
1027            .unwrap()
1028            .finish()
1029    }
1030
1031    #[test]
1032    fn test_2d() {
1033        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
1034            let geoms = geoms();
1035            let geometry_array = geom_array(coord_type);
1036            let geoms_again = geometry_array
1037                .iter_values()
1038                .map(|g| g.unwrap().to_geometry())
1039                .collect::<Vec<_>>();
1040            assert_eq!(geoms, geoms_again);
1041        }
1042    }
1043
1044    #[test]
1045    fn test_2d_roundtrip_arrow() {
1046        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
1047            let geoms = geoms();
1048            let geometry_array = geom_array(coord_type);
1049            let field = geometry_array.data_type.to_field("geometry", true);
1050            let union_array = geometry_array.into_arrow();
1051
1052            let geometry_array_again =
1053                GeometryArray::try_from((&union_array as _, &field)).unwrap();
1054            let geoms_again = geometry_array_again
1055                .iter_values()
1056                .map(|g| g.unwrap().to_geometry())
1057                .collect::<Vec<_>>();
1058            assert_eq!(geoms, geoms_again);
1059        }
1060    }
1061
1062    #[test]
1063    fn try_from_arrow() {
1064        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
1065            for prefer_multi in [true, false] {
1066                let geo_arr = crate::test::geometry::array(coord_type, prefer_multi);
1067
1068                let point_type = geo_arr.extension_type().clone();
1069                let field = point_type.to_field("geometry", true);
1070
1071                let arrow_arr = geo_arr.to_array_ref();
1072
1073                let geo_arr2: GeometryArray = (arrow_arr.as_ref(), point_type).try_into().unwrap();
1074                let geo_arr3: GeometryArray = (arrow_arr.as_ref(), &field).try_into().unwrap();
1075
1076                assert_eq!(geo_arr, geo_arr2);
1077                assert_eq!(geo_arr, geo_arr3);
1078            }
1079        }
1080    }
1081
1082    #[test]
1083    fn test_nullability() {
1084        let geoms = raw::geometry::geoms();
1085        let null_idxs = geoms
1086            .iter()
1087            .enumerate()
1088            .filter_map(|(i, geom)| if geom.is_none() { Some(i) } else { None })
1089            .collect::<Vec<_>>();
1090
1091        let typ = GeometryType::new(Default::default());
1092        let geo_arr = GeometryBuilder::from_nullable_geometries(&geoms, typ)
1093            .unwrap()
1094            .finish();
1095
1096        for null_idx in &null_idxs {
1097            assert!(geo_arr.is_null(*null_idx));
1098        }
1099    }
1100
1101    #[test]
1102    fn test_logical_nulls() {
1103        let geoms = raw::geometry::geoms();
1104        let expected_nulls = NullBuffer::from_iter(geoms.iter().map(|g| g.is_some()));
1105
1106        let typ = GeometryType::new(Default::default());
1107        let geo_arr = GeometryBuilder::from_nullable_geometries(&geoms, typ)
1108            .unwrap()
1109            .finish();
1110
1111        assert_eq!(geo_arr.logical_nulls().unwrap(), expected_nulls);
1112    }
1113
1114    #[test]
1115    fn into_coord_type() {
1116        for prefer_multi in [true, false] {
1117            let geo_arr = crate::test::geometry::array(CoordType::Interleaved, prefer_multi);
1118            let geo_arr2 = geo_arr
1119                .clone()
1120                .into_coord_type(CoordType::Separated)
1121                .into_coord_type(CoordType::Interleaved);
1122
1123            assert_eq!(geo_arr, geo_arr2);
1124        }
1125    }
1126
1127    #[test]
1128    fn partial_eq() {
1129        for prefer_multi in [true, false] {
1130            let arr1 = crate::test::geometry::array(CoordType::Interleaved, prefer_multi);
1131            let arr2 = crate::test::geometry::array(CoordType::Separated, prefer_multi);
1132
1133            assert_eq!(arr1, arr1);
1134            assert_eq!(arr2, arr2);
1135            assert_eq!(arr1, arr2);
1136
1137            assert_ne!(arr1, arr2.slice(0, 2));
1138        }
1139    }
1140
1141    #[test]
1142    fn should_persist_crs() {
1143        let geo_arr = crate::test::geometry::array(CoordType::Interleaved, false);
1144        let crs = Crs::from_authority_code("EPSG:4326".to_string());
1145        let geo_arr = geo_arr.with_metadata(Arc::new(Metadata::new(crs.clone(), None)));
1146
1147        let arrow_arr = geo_arr.to_array_ref();
1148        let field = geo_arr.data_type().to_field("geometry", true);
1149
1150        let geo_arr2: GeometryArray = (arrow_arr.as_ref(), &field).try_into().unwrap();
1151
1152        assert_eq!(geo_arr, geo_arr2);
1153        assert_eq!(geo_arr2.data_type.metadata().crs().clone(), crs);
1154    }
1155
1156    #[test]
1157    fn arrow_round_trip_should_preserve_slicing() {
1158        let geo_arr = crate::test::geometry::array(CoordType::Separated, false);
1159        let geometry_type = geo_arr.extension_type().clone();
1160
1161        let sliced = geo_arr.slice(2, 4);
1162        let arrow_arr = sliced.to_array_ref();
1163        let geo_arr2 = GeometryArray::try_from((arrow_arr.as_ref(), geometry_type)).unwrap();
1164
1165        assert_eq!(sliced, geo_arr2);
1166        assert_eq!(sliced.value(0).unwrap(), geo_arr2.value(0).unwrap());
1167    }
1168
1169    #[test]
1170    fn determine_if_sliced() {
1171        let geo_arr = crate::test::geometry::array(CoordType::Separated, false);
1172        assert!(!geo_arr.is_sliced());
1173
1174        let sliced = geo_arr.slice(2, 4);
1175        assert!(sliced.is_sliced());
1176    }
1177
1178    #[test]
1179    fn test_point_child_via_slicing() {
1180        let point_array = crate::test::point::array(Default::default(), Dimension::XY);
1181        let geometry_array = GeometryArray::from(point_array.clone());
1182
1183        let returned = geometry_array.point_child(Dimension::XY);
1184        assert_eq!(returned, point_array);
1185
1186        // Sliced at beginning
1187        let sliced_geometry_array = geometry_array.slice(0, 2);
1188        let point_child = sliced_geometry_array.point_child(Dimension::XY);
1189        assert_eq!(point_child, point_array.slice(0, 2));
1190
1191        // Sliced in middle
1192        let sliced_geometry_array = geometry_array.slice(1, 2);
1193        let point_child = sliced_geometry_array.point_child(Dimension::XY);
1194        assert_eq!(point_child, point_array.slice(1, 2));
1195
1196        // Sliced at end
1197        let sliced_geometry_array = geometry_array.slice(2, 2);
1198        let point_child = sliced_geometry_array.point_child(Dimension::XY);
1199        assert_eq!(point_child, point_array.slice(2, 2));
1200    }
1201
1202    #[test]
1203    fn test_point_child_mixed_geometries() {
1204        let geoms: Vec<Option<Wkt>> = vec![
1205            // 2D points
1206            Some(wkt! { POINT (30. 10.) }.into()),
1207            Some(wkt! { POINT (40. 20.) }.into()),
1208            // 3D points
1209            Some(wkt! { POINT Z (30. 10. 40.) }.into()),
1210            Some(wkt! { POINT Z (40. 20. 60.) }.into()),
1211            // More 2D points
1212            Some(wkt! { POINT (30. 10.) }.into()),
1213            Some(wkt! { POINT (40. 20.) }.into()),
1214        ];
1215
1216        let mut full_xy_point_arr =
1217            PointBuilder::new(PointType::new(Dimension::XY, Default::default()));
1218        for idx in [0, 1, 4, 5] {
1219            full_xy_point_arr
1220                .push_geometry(geoms[idx].as_ref())
1221                .unwrap();
1222        }
1223        let full_xy_point_arr = full_xy_point_arr.finish();
1224
1225        let geometry_array = GeometryBuilder::from_nullable_geometries(&geoms, Default::default())
1226            .unwrap()
1227            .finish();
1228
1229        let returned = geometry_array.point_child(Dimension::XY);
1230        assert_eq!(returned, full_xy_point_arr);
1231
1232        // Sliced at beginning
1233        let sliced_geometry_array = geometry_array.slice(0, 2);
1234        let point_child = sliced_geometry_array.point_child(Dimension::XY);
1235        assert_eq!(point_child, full_xy_point_arr.slice(0, 2));
1236
1237        // Sliced in middle
1238        let sliced_geometry_array = geometry_array.slice(1, 2);
1239        let point_child = sliced_geometry_array.point_child(Dimension::XY);
1240        assert_eq!(point_child, full_xy_point_arr.slice(1, 1));
1241
1242        // Sliced in middle, removing all 2D points
1243        let sliced_geometry_array = geometry_array.slice(2, 2);
1244        let point_child = sliced_geometry_array.point_child(Dimension::XY);
1245        assert_eq!(point_child, full_xy_point_arr.slice(1, 0));
1246
1247        let sliced_geometry_array = geometry_array.slice(3, 2);
1248        let point_child = sliced_geometry_array.point_child(Dimension::XY);
1249        assert_eq!(point_child, full_xy_point_arr.slice(2, 1));
1250
1251        // Sliced at end
1252        let sliced_geometry_array = geometry_array.slice(4, 2);
1253        let point_child = sliced_geometry_array.point_child(Dimension::XY);
1254        assert_eq!(point_child, full_xy_point_arr.slice(2, 2));
1255    }
1256}