geoarrow_array/array/
mixed.rs

1use std::collections::HashSet;
2use std::sync::Arc;
3
4use arrow_array::cast::AsArray;
5use arrow_array::{Array, ArrayRef, UnionArray};
6use arrow_buffer::ScalarBuffer;
7use arrow_schema::{DataType, UnionMode};
8use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
9use geoarrow_schema::{
10    CoordType, Dimension, GeoArrowType, GeometryCollectionType, LineStringType,
11    MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType,
12};
13
14use crate::GeoArrowArrayAccessor;
15use crate::array::{
16    DimensionIndex, LineStringArray, MultiLineStringArray, MultiPointArray, MultiPolygonArray,
17    PointArray, PolygonArray,
18};
19use crate::builder::{
20    LineStringBuilder, MultiLineStringBuilder, MultiPointBuilder, MultiPolygonBuilder,
21    PointBuilder, PolygonBuilder,
22};
23use crate::capacity::MixedCapacity;
24use crate::scalar::Geometry;
25use crate::trait_::GeoArrowArray;
26
27/// # Invariants
28///
29/// - All arrays must have the same dimension
30/// - All arrays must have the same coordinate layout (interleaved or separated)
31///
32/// - 1: Point
33/// - 2: LineString
34/// - 3: Polygon
35/// - 4: MultiPoint
36/// - 5: MultiLineString
37/// - 6: MultiPolygon
38/// - 7: GeometryCollection
39/// - 11: Point Z
40/// - 12: LineString Z
41/// - 13: Polygon Z
42/// - 14: MultiPoint Z
43/// - 15: MultiLineString Z
44/// - 16: MultiPolygon Z
45/// - 17: GeometryCollection Z
46/// - 21: Point M
47/// - 22: LineString M
48/// - 23: Polygon M
49/// - 24: MultiPoint M
50/// - 25: MultiLineString M
51/// - 26: MultiPolygon M
52/// - 27: GeometryCollection M
53/// - 31: Point ZM
54/// - 32: LineString ZM
55/// - 33: Polygon ZM
56/// - 34: MultiPoint ZM
57/// - 35: MultiLineString ZM
58/// - 36: MultiPolygon ZM
59/// - 37: GeometryCollection ZM
60#[derive(Debug, Clone)]
61pub struct MixedGeometryArray {
62    pub(crate) coord_type: CoordType,
63    pub(crate) dim: Dimension,
64
65    /// Invariant: every item in `type_ids` is `> 0 && < fields.len()` if `type_ids` are not provided.
66    pub(crate) type_ids: ScalarBuffer<i8>,
67
68    /// Invariant: `offsets.len() == type_ids.len()`
69    pub(crate) offsets: ScalarBuffer<i32>,
70
71    /// Invariant: Any of these arrays that are `Some()` must have length >0
72    pub(crate) points: PointArray,
73    pub(crate) line_strings: LineStringArray,
74    pub(crate) polygons: PolygonArray,
75    pub(crate) multi_points: MultiPointArray,
76    pub(crate) multi_line_strings: MultiLineStringArray,
77    pub(crate) multi_polygons: MultiPolygonArray,
78
79    /// We don't need a separate slice_length, because that's the length of the full
80    /// MixedGeometryArray
81    slice_offset: usize,
82}
83
84impl MixedGeometryArray {
85    /// Create a new MixedGeometryArray from parts
86    ///
87    /// # Implementation
88    ///
89    /// This function is `O(1)`.
90    ///
91    /// # Panics
92    ///
93    /// - if the validity is not `None` and its length is different from the number of geometries
94    /// - if the largest geometry offset does not match the number of coordinates
95    #[allow(clippy::too_many_arguments)]
96    pub fn new(
97        type_ids: ScalarBuffer<i8>,
98        offsets: ScalarBuffer<i32>,
99        points: Option<PointArray>,
100        line_strings: Option<LineStringArray>,
101        polygons: Option<PolygonArray>,
102        multi_points: Option<MultiPointArray>,
103        multi_line_strings: Option<MultiLineStringArray>,
104        multi_polygons: Option<MultiPolygonArray>,
105    ) -> Self {
106        let mut coord_types = HashSet::new();
107        if let Some(points) = &points {
108            coord_types.insert(points.data_type.coord_type());
109        }
110        if let Some(line_strings) = &line_strings {
111            coord_types.insert(line_strings.data_type.coord_type());
112        }
113        if let Some(polygons) = &polygons {
114            coord_types.insert(polygons.data_type.coord_type());
115        }
116        if let Some(multi_points) = &multi_points {
117            coord_types.insert(multi_points.data_type.coord_type());
118        }
119        if let Some(multi_line_strings) = &multi_line_strings {
120            coord_types.insert(multi_line_strings.data_type.coord_type());
121        }
122        if let Some(multi_polygons) = &multi_polygons {
123            coord_types.insert(multi_polygons.data_type.coord_type());
124        }
125        assert!(coord_types.len() <= 1);
126        let coord_type = coord_types
127            .into_iter()
128            .next()
129            .unwrap_or(CoordType::Interleaved);
130
131        let mut dimensions = HashSet::new();
132        if let Some(points) = &points {
133            dimensions.insert(points.data_type.dimension());
134        }
135        if let Some(line_strings) = &line_strings {
136            dimensions.insert(line_strings.data_type.dimension());
137        }
138        if let Some(polygons) = &polygons {
139            dimensions.insert(polygons.data_type.dimension());
140        }
141        if let Some(multi_points) = &multi_points {
142            dimensions.insert(multi_points.data_type.dimension());
143        }
144        if let Some(multi_line_strings) = &multi_line_strings {
145            dimensions.insert(multi_line_strings.data_type.dimension());
146        }
147        if let Some(multi_polygons) = &multi_polygons {
148            dimensions.insert(multi_polygons.data_type.dimension());
149        }
150        assert_eq!(dimensions.len(), 1);
151        let dim = dimensions.into_iter().next().unwrap();
152
153        Self {
154            coord_type,
155            dim,
156            type_ids,
157            offsets,
158            points: points.unwrap_or(
159                PointBuilder::new(
160                    PointType::new(dim, Default::default()).with_coord_type(coord_type),
161                )
162                .finish(),
163            ),
164            line_strings: line_strings.unwrap_or(
165                LineStringBuilder::new(
166                    LineStringType::new(dim, Default::default()).with_coord_type(coord_type),
167                )
168                .finish(),
169            ),
170            polygons: polygons.unwrap_or(
171                PolygonBuilder::new(
172                    PolygonType::new(dim, Default::default()).with_coord_type(coord_type),
173                )
174                .finish(),
175            ),
176            multi_points: multi_points.unwrap_or(
177                MultiPointBuilder::new(
178                    MultiPointType::new(dim, Default::default()).with_coord_type(coord_type),
179                )
180                .finish(),
181            ),
182            multi_line_strings: multi_line_strings.unwrap_or(
183                MultiLineStringBuilder::new(
184                    MultiLineStringType::new(dim, Default::default()).with_coord_type(coord_type),
185                )
186                .finish(),
187            ),
188            multi_polygons: multi_polygons.unwrap_or(
189                MultiPolygonBuilder::new(
190                    MultiPolygonType::new(dim, Default::default()).with_coord_type(coord_type),
191                )
192                .finish(),
193            ),
194            slice_offset: 0,
195        }
196    }
197
198    /// The lengths of each buffer contained in this array.
199    pub fn buffer_lengths(&self) -> MixedCapacity {
200        MixedCapacity::new(
201            self.points.buffer_lengths(),
202            self.line_strings.buffer_lengths(),
203            self.polygons.buffer_lengths(),
204            self.multi_points.buffer_lengths(),
205            self.multi_line_strings.buffer_lengths(),
206            self.multi_polygons.buffer_lengths(),
207        )
208    }
209
210    /// Return `true` if this array has been sliced.
211    pub(crate) fn is_sliced(&self) -> bool {
212        // Note this is still not a valid check, because it could've been sliced with start 0 but
213        // length less than the full length.
214        // self.slice_offset > 0 || self.slice_length
215
216        let mut child_lengths = 0;
217        child_lengths += self.points.len();
218        child_lengths += self.line_strings.len();
219        child_lengths += self.polygons.len();
220        child_lengths += self.multi_points.len();
221        child_lengths += self.multi_line_strings.len();
222        child_lengths += self.multi_polygons.len();
223
224        child_lengths > self.len()
225    }
226
227    pub fn has_points(&self) -> bool {
228        if self.points.is_empty() {
229            return false;
230        }
231
232        // If the array has been sliced, check a point type id still exists
233        if self.is_sliced() {
234            for t in self.type_ids.iter() {
235                if *t % 10 == 1 {
236                    return true;
237                }
238            }
239
240            return false;
241        }
242
243        true
244    }
245
246    pub fn has_line_strings(&self) -> bool {
247        if self.line_strings.is_empty() {
248            return false;
249        }
250
251        // If the array has been sliced, check a point type id still exists
252        if self.is_sliced() {
253            for t in self.type_ids.iter() {
254                if *t % 10 == 2 {
255                    return true;
256                }
257            }
258
259            return false;
260        }
261
262        true
263    }
264
265    pub fn has_polygons(&self) -> bool {
266        if self.polygons.is_empty() {
267            return false;
268        }
269
270        // If the array has been sliced, check a point type id still exists
271        if self.is_sliced() {
272            for t in self.type_ids.iter() {
273                if *t % 10 == 3 {
274                    return true;
275                }
276            }
277
278            return false;
279        }
280
281        true
282    }
283
284    pub fn has_multi_points(&self) -> bool {
285        if self.multi_points.is_empty() {
286            return false;
287        }
288
289        // If the array has been sliced, check a point type id still exists
290        if self.is_sliced() {
291            for t in self.type_ids.iter() {
292                if *t % 10 == 4 {
293                    return true;
294                }
295            }
296
297            return false;
298        }
299
300        true
301    }
302
303    pub fn has_multi_line_strings(&self) -> bool {
304        if self.multi_line_strings.is_empty() {
305            return false;
306        }
307
308        // If the array has been sliced, check a point type id still exists
309        if self.is_sliced() {
310            for t in self.type_ids.iter() {
311                if *t % 10 == 5 {
312                    return true;
313                }
314            }
315
316            return false;
317        }
318
319        true
320    }
321
322    pub fn has_multi_polygons(&self) -> bool {
323        if self.multi_polygons.is_empty() {
324            return false;
325        }
326
327        // If the array has been sliced, check a point type id still exists
328        if self.is_sliced() {
329            for t in self.type_ids.iter() {
330                if *t % 10 == 6 {
331                    return true;
332                }
333            }
334
335            return false;
336        }
337
338        true
339    }
340
341    pub fn has_only_points(&self) -> bool {
342        self.has_points()
343            && !self.has_line_strings()
344            && !self.has_polygons()
345            && !self.has_multi_points()
346            && !self.has_multi_line_strings()
347            && !self.has_multi_polygons()
348    }
349
350    pub fn has_only_line_strings(&self) -> bool {
351        !self.has_points()
352            && self.has_line_strings()
353            && !self.has_polygons()
354            && !self.has_multi_points()
355            && !self.has_multi_line_strings()
356            && !self.has_multi_polygons()
357    }
358
359    pub fn has_only_polygons(&self) -> bool {
360        !self.has_points()
361            && !self.has_line_strings()
362            && self.has_polygons()
363            && !self.has_multi_points()
364            && !self.has_multi_line_strings()
365            && !self.has_multi_polygons()
366    }
367
368    pub fn has_only_multi_points(&self) -> bool {
369        !self.has_points()
370            && !self.has_line_strings()
371            && !self.has_polygons()
372            && self.has_multi_points()
373            && !self.has_multi_line_strings()
374            && !self.has_multi_polygons()
375    }
376
377    pub fn has_only_multi_line_strings(&self) -> bool {
378        !self.has_points()
379            && !self.has_line_strings()
380            && !self.has_polygons()
381            && !self.has_multi_points()
382            && self.has_multi_line_strings()
383            && !self.has_multi_polygons()
384    }
385
386    pub fn has_only_multi_polygons(&self) -> bool {
387        !self.has_points()
388            && !self.has_line_strings()
389            && !self.has_polygons()
390            && !self.has_multi_points()
391            && !self.has_multi_line_strings()
392            && self.has_multi_polygons()
393    }
394
395    /// The number of bytes occupied by this array.
396    pub fn num_bytes(&self) -> usize {
397        self.buffer_lengths().num_bytes()
398    }
399
400    /// Slice this [`MixedGeometryArray`].
401    ///
402    /// # Implementation
403    ///
404    /// This operation is `O(F)` where `F` is the number of fields.
405    ///
406    /// # Panic
407    ///
408    /// This function panics iff `offset + length > self.len()`.
409    #[inline]
410    pub fn slice(&self, offset: usize, length: usize) -> Self {
411        assert!(
412            offset + length <= self.len(),
413            "offset + length may not exceed length of array"
414        );
415        Self {
416            coord_type: self.coord_type,
417            dim: self.dim,
418            type_ids: self.type_ids.slice(offset, length),
419            offsets: self.offsets.slice(offset, length),
420            points: self.points.clone(),
421            line_strings: self.line_strings.clone(),
422            polygons: self.polygons.clone(),
423            multi_points: self.multi_points.clone(),
424            multi_line_strings: self.multi_line_strings.clone(),
425            multi_polygons: self.multi_polygons.clone(),
426            slice_offset: self.slice_offset + offset,
427        }
428    }
429
430    pub fn into_coord_type(self, coord_type: CoordType) -> Self {
431        Self {
432            coord_type,
433            points: self.points.into_coord_type(coord_type),
434            line_strings: self.line_strings.into_coord_type(coord_type),
435            polygons: self.polygons.into_coord_type(coord_type),
436            multi_points: self.multi_points.into_coord_type(coord_type),
437            multi_line_strings: self.multi_line_strings.into_coord_type(coord_type),
438            multi_polygons: self.multi_polygons.into_coord_type(coord_type),
439            ..self
440        }
441    }
442
443    pub fn contained_types(&self) -> HashSet<GeoArrowType> {
444        let mut types = HashSet::new();
445        if self.has_points() {
446            types.insert(self.points.data_type());
447        }
448        if self.has_line_strings() {
449            types.insert(self.line_strings.data_type());
450        }
451        if self.has_polygons() {
452            types.insert(self.polygons.data_type());
453        }
454        if self.has_multi_points() {
455            types.insert(self.multi_points.data_type());
456        }
457        if self.has_multi_line_strings() {
458            types.insert(self.multi_line_strings.data_type());
459        }
460        if self.has_multi_polygons() {
461            types.insert(self.multi_polygons.data_type());
462        }
463
464        types
465    }
466
467    pub(crate) fn storage_type(&self) -> DataType {
468        match GeometryCollectionType::new(self.dim, Default::default())
469            .with_coord_type(self.coord_type)
470            .data_type()
471        {
472            DataType::List(inner_field) => inner_field.data_type().clone(),
473            _ => unreachable!(),
474        }
475    }
476
477    pub(crate) fn into_array_ref(self) -> ArrayRef {
478        Arc::new(UnionArray::from(self))
479    }
480
481    #[inline]
482    fn len(&self) -> usize {
483        // Note that `type_ids` is sliced as usual, and thus always has the correct length.
484        self.type_ids.len()
485    }
486
487    // Note: this is copied from ArrayAccessor because MixedGeometryArray doesn't implement
488    // GeoArrowArray
489    pub(crate) unsafe fn value_unchecked(&self, index: usize) -> Geometry {
490        let type_id = self.type_ids[index];
491        let offset = self.offsets[index] as usize;
492
493        let expect_msg = "native geometry value access should never error";
494        match type_id % 10 {
495            1 => Geometry::Point(self.points.value(offset).expect(expect_msg)),
496            2 => Geometry::LineString(self.line_strings.value(offset).expect(expect_msg)),
497            3 => Geometry::Polygon(self.polygons.value(offset).expect(expect_msg)),
498            4 => Geometry::MultiPoint(self.multi_points.value(offset).expect(expect_msg)),
499            5 => {
500                Geometry::MultiLineString(self.multi_line_strings.value(offset).expect(expect_msg))
501            }
502            6 => Geometry::MultiPolygon(self.multi_polygons.value(offset).expect(expect_msg)),
503            7 => {
504                panic!("nested geometry collections not supported in GeoArrow")
505            }
506            _ => unreachable!("unknown type_id {}", type_id),
507        }
508    }
509
510    // Note: this is copied from ArrayAccessor because MixedGeometryArray doesn't implement
511    // GeoArrowArray
512    pub(crate) fn value(&self, index: usize) -> Geometry<'_> {
513        assert!(index <= self.len());
514        unsafe { self.value_unchecked(index) }
515    }
516}
517
518impl From<MixedGeometryArray> for UnionArray {
519    fn from(value: MixedGeometryArray) -> Self {
520        let union_fields = match value.storage_type() {
521            DataType::Union(union_fields, _) => union_fields,
522            _ => unreachable!(),
523        };
524
525        let child_arrays = vec![
526            value.points.into_array_ref(),
527            value.line_strings.into_array_ref(),
528            value.polygons.into_array_ref(),
529            value.multi_points.into_array_ref(),
530            value.multi_line_strings.into_array_ref(),
531            value.multi_polygons.into_array_ref(),
532        ];
533
534        UnionArray::try_new(
535            union_fields,
536            value.type_ids,
537            Some(value.offsets),
538            child_arrays,
539        )
540        .unwrap()
541    }
542}
543
544impl TryFrom<(&UnionArray, Dimension, CoordType)> for MixedGeometryArray {
545    type Error = GeoArrowError;
546
547    fn try_from(
548        (value, dim, coord_type): (&UnionArray, Dimension, CoordType),
549    ) -> GeoArrowResult<Self> {
550        let mut points: Option<PointArray> = None;
551        let mut line_strings: Option<LineStringArray> = None;
552        let mut polygons: Option<PolygonArray> = None;
553        let mut multi_points: Option<MultiPointArray> = None;
554        let mut multi_line_strings: Option<MultiLineStringArray> = None;
555        let mut multi_polygons: Option<MultiPolygonArray> = None;
556
557        match value.data_type() {
558            DataType::Union(fields, mode) => {
559                if !matches!(mode, UnionMode::Dense) {
560                    return Err(GeoArrowError::InvalidGeoArrow(
561                        "Expected dense union".to_string(),
562                    ));
563                }
564
565                for (type_id, _field) in fields.iter() {
566                    let found_dimension = Dimension::from_order((type_id / 10) as _)?;
567
568                    if dim != found_dimension {
569                        return Err(GeoArrowError::InvalidGeoArrow(format!(
570                            "expected dimension: {:?}, found child array with dimension {:?} and type_id: {}",
571                            dim, found_dimension, type_id
572                        )));
573                    }
574
575                    match type_id % 10 {
576                        1 => {
577                            points = Some(
578                                (
579                                    value.child(type_id).as_ref(),
580                                    PointType::new(dim, Default::default())
581                                        .with_coord_type(coord_type),
582                                )
583                                    .try_into()
584                                    .unwrap(),
585                            );
586                        }
587                        2 => {
588                            line_strings = Some(
589                                (
590                                    value.child(type_id).as_ref(),
591                                    LineStringType::new(dim, Default::default())
592                                        .with_coord_type(coord_type),
593                                )
594                                    .try_into()
595                                    .unwrap(),
596                            );
597                        }
598                        3 => {
599                            polygons = Some(
600                                (
601                                    value.child(type_id).as_ref(),
602                                    PolygonType::new(dim, Default::default())
603                                        .with_coord_type(coord_type),
604                                )
605                                    .try_into()
606                                    .unwrap(),
607                            );
608                        }
609                        4 => {
610                            multi_points = Some(
611                                (
612                                    value.child(type_id).as_ref(),
613                                    MultiPointType::new(dim, Default::default())
614                                        .with_coord_type(coord_type),
615                                )
616                                    .try_into()
617                                    .unwrap(),
618                            );
619                        }
620                        5 => {
621                            multi_line_strings = Some(
622                                (
623                                    value.child(type_id).as_ref(),
624                                    MultiLineStringType::new(dim, Default::default())
625                                        .with_coord_type(coord_type),
626                                )
627                                    .try_into()
628                                    .unwrap(),
629                            );
630                        }
631                        6 => {
632                            multi_polygons = Some(
633                                (
634                                    value.child(type_id).as_ref(),
635                                    MultiPolygonType::new(dim, Default::default())
636                                        .with_coord_type(coord_type),
637                                )
638                                    .try_into()
639                                    .unwrap(),
640                            );
641                        }
642                        _ => {
643                            return Err(GeoArrowError::InvalidGeoArrow(format!(
644                                "Unexpected type_id {} when converting to MixedGeometryArray",
645                                type_id
646                            )));
647                        }
648                    }
649                }
650            }
651            _ => {
652                return Err(GeoArrowError::InvalidGeoArrow(
653                    "expected union type when converting to MixedGeometryArray".to_string(),
654                ));
655            }
656        };
657
658        let type_ids = value.type_ids().clone();
659        // This is after checking for dense union
660        let offsets = value.offsets().unwrap().clone();
661
662        Ok(Self::new(
663            type_ids,
664            offsets,
665            points,
666            line_strings,
667            polygons,
668            multi_points,
669            multi_line_strings,
670            multi_polygons,
671        ))
672    }
673}
674
675impl TryFrom<(&dyn Array, Dimension, CoordType)> for MixedGeometryArray {
676    type Error = GeoArrowError;
677
678    fn try_from(
679        (value, dim, coord_type): (&dyn Array, Dimension, CoordType),
680    ) -> GeoArrowResult<Self> {
681        match value.data_type() {
682            DataType::Union(_, _) => (value.as_union(), dim, coord_type).try_into(),
683            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
684                "Unexpected MixedGeometryArray DataType: {:?}",
685                dt
686            ))),
687        }
688    }
689}
690
691impl PartialEq for MixedGeometryArray {
692    fn eq(&self, other: &Self) -> bool {
693        self.dim == other.dim
694            && self.type_ids == other.type_ids
695            && self.offsets == other.offsets
696            && self.points == other.points
697            && self.line_strings == other.line_strings
698            && self.polygons == other.polygons
699            && self.multi_points == other.multi_points
700            && self.multi_line_strings == other.multi_line_strings
701            && self.multi_polygons == other.multi_polygons
702            && self.slice_offset == other.slice_offset
703    }
704}