1use std::collections::HashSet;
2use std::sync::Arc;
3
4use arrow_array::cast::AsArray;
5use arrow_array::{Array, ArrayRef, UnionArray};
6use arrow_buffer::ScalarBuffer;
7use arrow_schema::{DataType, UnionMode};
8use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
9use geoarrow_schema::{
10 CoordType, Dimension, GeoArrowType, GeometryCollectionType, LineStringType,
11 MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType,
12};
13
14use crate::GeoArrowArrayAccessor;
15use crate::array::{
16 DimensionIndex, LineStringArray, MultiLineStringArray, MultiPointArray, MultiPolygonArray,
17 PointArray, PolygonArray,
18};
19use crate::builder::{
20 LineStringBuilder, MultiLineStringBuilder, MultiPointBuilder, MultiPolygonBuilder,
21 PointBuilder, PolygonBuilder,
22};
23use crate::capacity::MixedCapacity;
24use crate::scalar::Geometry;
25use crate::trait_::GeoArrowArray;
26
27#[derive(Debug, Clone)]
61pub struct MixedGeometryArray {
62 pub(crate) coord_type: CoordType,
63 pub(crate) dim: Dimension,
64
65 pub(crate) type_ids: ScalarBuffer<i8>,
67
68 pub(crate) offsets: ScalarBuffer<i32>,
70
71 pub(crate) points: PointArray,
73 pub(crate) line_strings: LineStringArray,
74 pub(crate) polygons: PolygonArray,
75 pub(crate) multi_points: MultiPointArray,
76 pub(crate) multi_line_strings: MultiLineStringArray,
77 pub(crate) multi_polygons: MultiPolygonArray,
78
79 slice_offset: usize,
82}
83
84impl MixedGeometryArray {
85 #[allow(clippy::too_many_arguments)]
96 pub fn new(
97 type_ids: ScalarBuffer<i8>,
98 offsets: ScalarBuffer<i32>,
99 points: Option<PointArray>,
100 line_strings: Option<LineStringArray>,
101 polygons: Option<PolygonArray>,
102 multi_points: Option<MultiPointArray>,
103 multi_line_strings: Option<MultiLineStringArray>,
104 multi_polygons: Option<MultiPolygonArray>,
105 ) -> Self {
106 let mut coord_types = HashSet::new();
107 if let Some(points) = &points {
108 coord_types.insert(points.data_type.coord_type());
109 }
110 if let Some(line_strings) = &line_strings {
111 coord_types.insert(line_strings.data_type.coord_type());
112 }
113 if let Some(polygons) = &polygons {
114 coord_types.insert(polygons.data_type.coord_type());
115 }
116 if let Some(multi_points) = &multi_points {
117 coord_types.insert(multi_points.data_type.coord_type());
118 }
119 if let Some(multi_line_strings) = &multi_line_strings {
120 coord_types.insert(multi_line_strings.data_type.coord_type());
121 }
122 if let Some(multi_polygons) = &multi_polygons {
123 coord_types.insert(multi_polygons.data_type.coord_type());
124 }
125 assert!(coord_types.len() <= 1);
126 let coord_type = coord_types
127 .into_iter()
128 .next()
129 .unwrap_or(CoordType::Interleaved);
130
131 let mut dimensions = HashSet::new();
132 if let Some(points) = &points {
133 dimensions.insert(points.data_type.dimension());
134 }
135 if let Some(line_strings) = &line_strings {
136 dimensions.insert(line_strings.data_type.dimension());
137 }
138 if let Some(polygons) = &polygons {
139 dimensions.insert(polygons.data_type.dimension());
140 }
141 if let Some(multi_points) = &multi_points {
142 dimensions.insert(multi_points.data_type.dimension());
143 }
144 if let Some(multi_line_strings) = &multi_line_strings {
145 dimensions.insert(multi_line_strings.data_type.dimension());
146 }
147 if let Some(multi_polygons) = &multi_polygons {
148 dimensions.insert(multi_polygons.data_type.dimension());
149 }
150 assert_eq!(dimensions.len(), 1);
151 let dim = dimensions.into_iter().next().unwrap();
152
153 Self {
154 coord_type,
155 dim,
156 type_ids,
157 offsets,
158 points: points.unwrap_or(
159 PointBuilder::new(
160 PointType::new(dim, Default::default()).with_coord_type(coord_type),
161 )
162 .finish(),
163 ),
164 line_strings: line_strings.unwrap_or(
165 LineStringBuilder::new(
166 LineStringType::new(dim, Default::default()).with_coord_type(coord_type),
167 )
168 .finish(),
169 ),
170 polygons: polygons.unwrap_or(
171 PolygonBuilder::new(
172 PolygonType::new(dim, Default::default()).with_coord_type(coord_type),
173 )
174 .finish(),
175 ),
176 multi_points: multi_points.unwrap_or(
177 MultiPointBuilder::new(
178 MultiPointType::new(dim, Default::default()).with_coord_type(coord_type),
179 )
180 .finish(),
181 ),
182 multi_line_strings: multi_line_strings.unwrap_or(
183 MultiLineStringBuilder::new(
184 MultiLineStringType::new(dim, Default::default()).with_coord_type(coord_type),
185 )
186 .finish(),
187 ),
188 multi_polygons: multi_polygons.unwrap_or(
189 MultiPolygonBuilder::new(
190 MultiPolygonType::new(dim, Default::default()).with_coord_type(coord_type),
191 )
192 .finish(),
193 ),
194 slice_offset: 0,
195 }
196 }
197
198 pub fn buffer_lengths(&self) -> MixedCapacity {
200 MixedCapacity::new(
201 self.points.buffer_lengths(),
202 self.line_strings.buffer_lengths(),
203 self.polygons.buffer_lengths(),
204 self.multi_points.buffer_lengths(),
205 self.multi_line_strings.buffer_lengths(),
206 self.multi_polygons.buffer_lengths(),
207 )
208 }
209
210 pub(crate) fn is_sliced(&self) -> bool {
212 let mut child_lengths = 0;
217 child_lengths += self.points.len();
218 child_lengths += self.line_strings.len();
219 child_lengths += self.polygons.len();
220 child_lengths += self.multi_points.len();
221 child_lengths += self.multi_line_strings.len();
222 child_lengths += self.multi_polygons.len();
223
224 child_lengths > self.len()
225 }
226
227 pub fn has_points(&self) -> bool {
228 if self.points.is_empty() {
229 return false;
230 }
231
232 if self.is_sliced() {
234 for t in self.type_ids.iter() {
235 if *t % 10 == 1 {
236 return true;
237 }
238 }
239
240 return false;
241 }
242
243 true
244 }
245
246 pub fn has_line_strings(&self) -> bool {
247 if self.line_strings.is_empty() {
248 return false;
249 }
250
251 if self.is_sliced() {
253 for t in self.type_ids.iter() {
254 if *t % 10 == 2 {
255 return true;
256 }
257 }
258
259 return false;
260 }
261
262 true
263 }
264
265 pub fn has_polygons(&self) -> bool {
266 if self.polygons.is_empty() {
267 return false;
268 }
269
270 if self.is_sliced() {
272 for t in self.type_ids.iter() {
273 if *t % 10 == 3 {
274 return true;
275 }
276 }
277
278 return false;
279 }
280
281 true
282 }
283
284 pub fn has_multi_points(&self) -> bool {
285 if self.multi_points.is_empty() {
286 return false;
287 }
288
289 if self.is_sliced() {
291 for t in self.type_ids.iter() {
292 if *t % 10 == 4 {
293 return true;
294 }
295 }
296
297 return false;
298 }
299
300 true
301 }
302
303 pub fn has_multi_line_strings(&self) -> bool {
304 if self.multi_line_strings.is_empty() {
305 return false;
306 }
307
308 if self.is_sliced() {
310 for t in self.type_ids.iter() {
311 if *t % 10 == 5 {
312 return true;
313 }
314 }
315
316 return false;
317 }
318
319 true
320 }
321
322 pub fn has_multi_polygons(&self) -> bool {
323 if self.multi_polygons.is_empty() {
324 return false;
325 }
326
327 if self.is_sliced() {
329 for t in self.type_ids.iter() {
330 if *t % 10 == 6 {
331 return true;
332 }
333 }
334
335 return false;
336 }
337
338 true
339 }
340
341 pub fn has_only_points(&self) -> bool {
342 self.has_points()
343 && !self.has_line_strings()
344 && !self.has_polygons()
345 && !self.has_multi_points()
346 && !self.has_multi_line_strings()
347 && !self.has_multi_polygons()
348 }
349
350 pub fn has_only_line_strings(&self) -> bool {
351 !self.has_points()
352 && self.has_line_strings()
353 && !self.has_polygons()
354 && !self.has_multi_points()
355 && !self.has_multi_line_strings()
356 && !self.has_multi_polygons()
357 }
358
359 pub fn has_only_polygons(&self) -> bool {
360 !self.has_points()
361 && !self.has_line_strings()
362 && self.has_polygons()
363 && !self.has_multi_points()
364 && !self.has_multi_line_strings()
365 && !self.has_multi_polygons()
366 }
367
368 pub fn has_only_multi_points(&self) -> bool {
369 !self.has_points()
370 && !self.has_line_strings()
371 && !self.has_polygons()
372 && self.has_multi_points()
373 && !self.has_multi_line_strings()
374 && !self.has_multi_polygons()
375 }
376
377 pub fn has_only_multi_line_strings(&self) -> bool {
378 !self.has_points()
379 && !self.has_line_strings()
380 && !self.has_polygons()
381 && !self.has_multi_points()
382 && self.has_multi_line_strings()
383 && !self.has_multi_polygons()
384 }
385
386 pub fn has_only_multi_polygons(&self) -> bool {
387 !self.has_points()
388 && !self.has_line_strings()
389 && !self.has_polygons()
390 && !self.has_multi_points()
391 && !self.has_multi_line_strings()
392 && self.has_multi_polygons()
393 }
394
395 pub fn num_bytes(&self) -> usize {
397 self.buffer_lengths().num_bytes()
398 }
399
400 #[inline]
410 pub fn slice(&self, offset: usize, length: usize) -> Self {
411 assert!(
412 offset + length <= self.len(),
413 "offset + length may not exceed length of array"
414 );
415 Self {
416 coord_type: self.coord_type,
417 dim: self.dim,
418 type_ids: self.type_ids.slice(offset, length),
419 offsets: self.offsets.slice(offset, length),
420 points: self.points.clone(),
421 line_strings: self.line_strings.clone(),
422 polygons: self.polygons.clone(),
423 multi_points: self.multi_points.clone(),
424 multi_line_strings: self.multi_line_strings.clone(),
425 multi_polygons: self.multi_polygons.clone(),
426 slice_offset: self.slice_offset + offset,
427 }
428 }
429
430 pub fn into_coord_type(self, coord_type: CoordType) -> Self {
431 Self {
432 coord_type,
433 points: self.points.into_coord_type(coord_type),
434 line_strings: self.line_strings.into_coord_type(coord_type),
435 polygons: self.polygons.into_coord_type(coord_type),
436 multi_points: self.multi_points.into_coord_type(coord_type),
437 multi_line_strings: self.multi_line_strings.into_coord_type(coord_type),
438 multi_polygons: self.multi_polygons.into_coord_type(coord_type),
439 ..self
440 }
441 }
442
443 pub fn contained_types(&self) -> HashSet<GeoArrowType> {
444 let mut types = HashSet::new();
445 if self.has_points() {
446 types.insert(self.points.data_type());
447 }
448 if self.has_line_strings() {
449 types.insert(self.line_strings.data_type());
450 }
451 if self.has_polygons() {
452 types.insert(self.polygons.data_type());
453 }
454 if self.has_multi_points() {
455 types.insert(self.multi_points.data_type());
456 }
457 if self.has_multi_line_strings() {
458 types.insert(self.multi_line_strings.data_type());
459 }
460 if self.has_multi_polygons() {
461 types.insert(self.multi_polygons.data_type());
462 }
463
464 types
465 }
466
467 pub(crate) fn storage_type(&self) -> DataType {
468 match GeometryCollectionType::new(self.dim, Default::default())
469 .with_coord_type(self.coord_type)
470 .data_type()
471 {
472 DataType::List(inner_field) => inner_field.data_type().clone(),
473 _ => unreachable!(),
474 }
475 }
476
477 pub(crate) fn into_array_ref(self) -> ArrayRef {
478 Arc::new(UnionArray::from(self))
479 }
480
481 #[inline]
482 fn len(&self) -> usize {
483 self.type_ids.len()
485 }
486
487 pub(crate) unsafe fn value_unchecked(&self, index: usize) -> Geometry {
490 let type_id = self.type_ids[index];
491 let offset = self.offsets[index] as usize;
492
493 let expect_msg = "native geometry value access should never error";
494 match type_id % 10 {
495 1 => Geometry::Point(self.points.value(offset).expect(expect_msg)),
496 2 => Geometry::LineString(self.line_strings.value(offset).expect(expect_msg)),
497 3 => Geometry::Polygon(self.polygons.value(offset).expect(expect_msg)),
498 4 => Geometry::MultiPoint(self.multi_points.value(offset).expect(expect_msg)),
499 5 => {
500 Geometry::MultiLineString(self.multi_line_strings.value(offset).expect(expect_msg))
501 }
502 6 => Geometry::MultiPolygon(self.multi_polygons.value(offset).expect(expect_msg)),
503 7 => {
504 panic!("nested geometry collections not supported in GeoArrow")
505 }
506 _ => unreachable!("unknown type_id {}", type_id),
507 }
508 }
509
510 pub(crate) fn value(&self, index: usize) -> Geometry<'_> {
513 assert!(index <= self.len());
514 unsafe { self.value_unchecked(index) }
515 }
516}
517
518impl From<MixedGeometryArray> for UnionArray {
519 fn from(value: MixedGeometryArray) -> Self {
520 let union_fields = match value.storage_type() {
521 DataType::Union(union_fields, _) => union_fields,
522 _ => unreachable!(),
523 };
524
525 let child_arrays = vec![
526 value.points.into_array_ref(),
527 value.line_strings.into_array_ref(),
528 value.polygons.into_array_ref(),
529 value.multi_points.into_array_ref(),
530 value.multi_line_strings.into_array_ref(),
531 value.multi_polygons.into_array_ref(),
532 ];
533
534 UnionArray::try_new(
535 union_fields,
536 value.type_ids,
537 Some(value.offsets),
538 child_arrays,
539 )
540 .unwrap()
541 }
542}
543
544impl TryFrom<(&UnionArray, Dimension, CoordType)> for MixedGeometryArray {
545 type Error = GeoArrowError;
546
547 fn try_from(
548 (value, dim, coord_type): (&UnionArray, Dimension, CoordType),
549 ) -> GeoArrowResult<Self> {
550 let mut points: Option<PointArray> = None;
551 let mut line_strings: Option<LineStringArray> = None;
552 let mut polygons: Option<PolygonArray> = None;
553 let mut multi_points: Option<MultiPointArray> = None;
554 let mut multi_line_strings: Option<MultiLineStringArray> = None;
555 let mut multi_polygons: Option<MultiPolygonArray> = None;
556
557 match value.data_type() {
558 DataType::Union(fields, mode) => {
559 if !matches!(mode, UnionMode::Dense) {
560 return Err(GeoArrowError::InvalidGeoArrow(
561 "Expected dense union".to_string(),
562 ));
563 }
564
565 for (type_id, _field) in fields.iter() {
566 let found_dimension = Dimension::from_order((type_id / 10) as _)?;
567
568 if dim != found_dimension {
569 return Err(GeoArrowError::InvalidGeoArrow(format!(
570 "expected dimension: {:?}, found child array with dimension {:?} and type_id: {}",
571 dim, found_dimension, type_id
572 )));
573 }
574
575 match type_id % 10 {
576 1 => {
577 points = Some(
578 (
579 value.child(type_id).as_ref(),
580 PointType::new(dim, Default::default())
581 .with_coord_type(coord_type),
582 )
583 .try_into()
584 .unwrap(),
585 );
586 }
587 2 => {
588 line_strings = Some(
589 (
590 value.child(type_id).as_ref(),
591 LineStringType::new(dim, Default::default())
592 .with_coord_type(coord_type),
593 )
594 .try_into()
595 .unwrap(),
596 );
597 }
598 3 => {
599 polygons = Some(
600 (
601 value.child(type_id).as_ref(),
602 PolygonType::new(dim, Default::default())
603 .with_coord_type(coord_type),
604 )
605 .try_into()
606 .unwrap(),
607 );
608 }
609 4 => {
610 multi_points = Some(
611 (
612 value.child(type_id).as_ref(),
613 MultiPointType::new(dim, Default::default())
614 .with_coord_type(coord_type),
615 )
616 .try_into()
617 .unwrap(),
618 );
619 }
620 5 => {
621 multi_line_strings = Some(
622 (
623 value.child(type_id).as_ref(),
624 MultiLineStringType::new(dim, Default::default())
625 .with_coord_type(coord_type),
626 )
627 .try_into()
628 .unwrap(),
629 );
630 }
631 6 => {
632 multi_polygons = Some(
633 (
634 value.child(type_id).as_ref(),
635 MultiPolygonType::new(dim, Default::default())
636 .with_coord_type(coord_type),
637 )
638 .try_into()
639 .unwrap(),
640 );
641 }
642 _ => {
643 return Err(GeoArrowError::InvalidGeoArrow(format!(
644 "Unexpected type_id {} when converting to MixedGeometryArray",
645 type_id
646 )));
647 }
648 }
649 }
650 }
651 _ => {
652 return Err(GeoArrowError::InvalidGeoArrow(
653 "expected union type when converting to MixedGeometryArray".to_string(),
654 ));
655 }
656 };
657
658 let type_ids = value.type_ids().clone();
659 let offsets = value.offsets().unwrap().clone();
661
662 Ok(Self::new(
663 type_ids,
664 offsets,
665 points,
666 line_strings,
667 polygons,
668 multi_points,
669 multi_line_strings,
670 multi_polygons,
671 ))
672 }
673}
674
675impl TryFrom<(&dyn Array, Dimension, CoordType)> for MixedGeometryArray {
676 type Error = GeoArrowError;
677
678 fn try_from(
679 (value, dim, coord_type): (&dyn Array, Dimension, CoordType),
680 ) -> GeoArrowResult<Self> {
681 match value.data_type() {
682 DataType::Union(_, _) => (value.as_union(), dim, coord_type).try_into(),
683 dt => Err(GeoArrowError::InvalidGeoArrow(format!(
684 "Unexpected MixedGeometryArray DataType: {:?}",
685 dt
686 ))),
687 }
688 }
689}
690
691impl PartialEq for MixedGeometryArray {
692 fn eq(&self, other: &Self) -> bool {
693 self.dim == other.dim
694 && self.type_ids == other.type_ids
695 && self.offsets == other.offsets
696 && self.points == other.points
697 && self.line_strings == other.line_strings
698 && self.polygons == other.polygons
699 && self.multi_points == other.multi_points
700 && self.multi_line_strings == other.multi_line_strings
701 && self.multi_polygons == other.multi_polygons
702 && self.slice_offset == other.slice_offset
703 }
704}