1use std::collections::HashSet;
2use std::sync::Arc;
3
4use arrow_schema::extension::ExtensionType;
5use arrow_schema::{ArrowError, DataType, Field, UnionFields, UnionMode};
6
7use crate::error::GeoArrowError;
8use crate::metadata::Metadata;
9use crate::{CoordType, Dimension};
10
11macro_rules! define_basic_type {
12 (
13 $(#[$($attrss:meta)*])*
14 $struct_name:ident
15 ) => {
16 $(#[$($attrss)*])*
17 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
18 pub struct $struct_name {
19 coord_type: CoordType,
20 dim: Dimension,
21 metadata: Arc<Metadata>,
22 }
23
24 impl $struct_name {
25 pub fn new(dim: Dimension, metadata: Arc<Metadata>) -> Self {
27 Self {
28 coord_type: Default::default(),
29 dim,
30 metadata,
31 }
32 }
33
34 pub fn with_coord_type(self, coord_type: CoordType) -> Self {
36 Self { coord_type, ..self }
37 }
38
39 pub fn with_dimension(self, dim: Dimension) -> Self {
41 Self { dim, ..self }
42 }
43
44 pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
46 Self { metadata, ..self }
47 }
48
49 pub fn coord_type(&self) -> CoordType {
51 self.coord_type
52 }
53
54 pub fn dimension(&self) -> Dimension {
56 self.dim
57 }
58
59 pub fn metadata(&self) -> &Arc<Metadata> {
61 &self.metadata
62 }
63
64 pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
66 Field::new(name, self.data_type(), nullable).with_extension_type(self.clone())
67 }
68
69 pub fn into_inner(self) -> (CoordType, Dimension, Arc<Metadata>) {
71 (self.coord_type, self.dim, self.metadata)
72 }
73 }
74 };
75}
76
77define_basic_type!(
78 PointType
83);
84define_basic_type!(
85 LineStringType
90);
91define_basic_type!(
92 PolygonType
97);
98define_basic_type!(
99 MultiPointType
104);
105define_basic_type!(
106 MultiLineStringType
111);
112define_basic_type!(
113 MultiPolygonType
118);
119define_basic_type!(
120 GeometryCollectionType
125);
126
127impl PointType {
128 pub fn data_type(&self) -> DataType {
140 coord_type_to_data_type(self.coord_type, self.dim)
141 }
142}
143
144impl ExtensionType for PointType {
145 const NAME: &'static str = "geoarrow.point";
146
147 type Metadata = Arc<Metadata>;
148
149 fn metadata(&self) -> &Self::Metadata {
150 self.metadata()
151 }
152
153 fn serialize_metadata(&self) -> Option<String> {
154 self.metadata.serialize()
155 }
156
157 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
158 Ok(Arc::new(Metadata::deserialize(metadata)?))
159 }
160
161 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
162 let (coord_type, dim) = parse_point(data_type)?;
163 if coord_type != self.coord_type {
164 return Err(ArrowError::SchemaError(format!(
165 "Expected coordinate type {:?}, but got {:?}",
166 self.coord_type, coord_type
167 )));
168 }
169 if dim != self.dim {
170 return Err(ArrowError::SchemaError(format!(
171 "Expected dimension {:?}, but got {:?}",
172 self.dim, dim
173 )));
174 }
175 Ok(())
176 }
177
178 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
179 let (coord_type, dim) = parse_point(data_type)?;
180 Ok(Self {
181 coord_type,
182 dim,
183 metadata,
184 })
185 }
186}
187
188fn parse_point(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
189 match data_type {
190 DataType::FixedSizeList(inner_field, list_size) => {
191 let dim_parsed_from_field = Dimension::from_interleaved_field(inner_field)?;
192 if dim_parsed_from_field.size() != *list_size as usize {
193 Err(GeoArrowError::InvalidGeoArrow(format!(
194 "Field metadata suggests list of size {}, but list size is {}",
195 dim_parsed_from_field.size(),
196 list_size
197 ))
198 .into())
199 } else {
200 Ok((CoordType::Interleaved, dim_parsed_from_field))
201 }
202 }
203 DataType::Struct(struct_fields) => Ok((
204 CoordType::Separated,
205 Dimension::from_separated_field(struct_fields)?,
206 )),
207 dt => Err(ArrowError::SchemaError(format!(
208 "Unexpected data type {dt}"
209 ))),
210 }
211}
212
213impl LineStringType {
214 pub fn data_type(&self) -> DataType {
232 let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
233 let vertices_field = Field::new("vertices", coords_type, false).into();
234 DataType::List(vertices_field)
235 }
236}
237
238impl ExtensionType for LineStringType {
239 const NAME: &'static str = "geoarrow.linestring";
240
241 type Metadata = Arc<Metadata>;
242
243 fn metadata(&self) -> &Self::Metadata {
244 self.metadata()
245 }
246
247 fn serialize_metadata(&self) -> Option<String> {
248 self.metadata.serialize()
249 }
250
251 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
252 Ok(Arc::new(Metadata::deserialize(metadata)?))
253 }
254
255 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
256 let (coord_type, dim) = parse_linestring(data_type)?;
257 if coord_type != self.coord_type {
258 return Err(ArrowError::SchemaError(format!(
259 "Expected coordinate type {:?}, but got {:?}",
260 self.coord_type, coord_type
261 )));
262 }
263 if dim != self.dim {
264 return Err(ArrowError::SchemaError(format!(
265 "Expected dimension {:?}, but got {:?}",
266 self.dim, dim
267 )));
268 }
269 Ok(())
270 }
271
272 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
273 let (coord_type, dim) = parse_linestring(data_type)?;
274 Ok(Self {
275 coord_type,
276 dim,
277 metadata,
278 })
279 }
280}
281
282fn parse_linestring(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
283 match data_type {
284 DataType::List(inner_field) | DataType::LargeList(inner_field) => {
285 parse_point(inner_field.data_type())
286 }
287 dt => Err(ArrowError::SchemaError(format!(
288 "Unexpected data type {dt}"
289 ))),
290 }
291}
292
293impl PolygonType {
294 pub fn data_type(&self) -> DataType {
316 let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
317 let vertices_field = Field::new("vertices", coords_type, false);
318 let rings_field = Field::new_list("rings", vertices_field, false).into();
319 DataType::List(rings_field)
320 }
321}
322
323impl ExtensionType for PolygonType {
324 const NAME: &'static str = "geoarrow.polygon";
325
326 type Metadata = Arc<Metadata>;
327
328 fn metadata(&self) -> &Self::Metadata {
329 self.metadata()
330 }
331
332 fn serialize_metadata(&self) -> Option<String> {
333 self.metadata.serialize()
334 }
335
336 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
337 Ok(Arc::new(Metadata::deserialize(metadata)?))
338 }
339
340 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
341 let (coord_type, dim) = parse_polygon(data_type)?;
342 if coord_type != self.coord_type {
343 return Err(ArrowError::SchemaError(format!(
344 "Expected coordinate type {:?}, but got {:?}",
345 self.coord_type, coord_type
346 )));
347 }
348 if dim != self.dim {
349 return Err(ArrowError::SchemaError(format!(
350 "Expected dimension {:?}, but got {:?}",
351 self.dim, dim
352 )));
353 }
354 Ok(())
355 }
356
357 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
358 let (coord_type, dim) = parse_polygon(data_type)?;
359 Ok(Self {
360 coord_type,
361 dim,
362 metadata,
363 })
364 }
365}
366
367fn parse_polygon(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
368 match data_type {
369 DataType::List(inner1) => match inner1.data_type() {
370 DataType::List(inner2) => parse_point(inner2.data_type()),
371 dt => Err(ArrowError::SchemaError(format!(
372 "Unexpected inner polygon data type: {dt}"
373 ))),
374 },
375 DataType::LargeList(inner1) => match inner1.data_type() {
376 DataType::LargeList(inner2) => parse_point(inner2.data_type()),
377 dt => Err(ArrowError::SchemaError(format!(
378 "Unexpected inner polygon data type: {dt}"
379 ))),
380 },
381 dt => Err(ArrowError::SchemaError(format!(
382 "Unexpected root data type parsing polygon {dt}"
383 ))),
384 }
385}
386
387impl MultiPointType {
388 pub fn data_type(&self) -> DataType {
409 let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
410 let vertices_field = Field::new("points", coords_type, false).into();
411 DataType::List(vertices_field)
412 }
413}
414
415impl ExtensionType for MultiPointType {
416 const NAME: &'static str = "geoarrow.multipoint";
417
418 type Metadata = Arc<Metadata>;
419
420 fn metadata(&self) -> &Self::Metadata {
421 self.metadata()
422 }
423
424 fn serialize_metadata(&self) -> Option<String> {
425 self.metadata.serialize()
426 }
427
428 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
429 Ok(Arc::new(Metadata::deserialize(metadata)?))
430 }
431
432 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
433 let (coord_type, dim) = parse_multipoint(data_type)?;
434 if coord_type != self.coord_type {
435 return Err(ArrowError::SchemaError(format!(
436 "Expected coordinate type {:?}, but got {:?}",
437 self.coord_type, coord_type
438 )));
439 }
440 if dim != self.dim {
441 return Err(ArrowError::SchemaError(format!(
442 "Expected dimension {:?}, but got {:?}",
443 self.dim, dim
444 )));
445 }
446 Ok(())
447 }
448
449 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
450 let (coord_type, dim) = parse_multipoint(data_type)?;
451 Ok(Self {
452 coord_type,
453 dim,
454 metadata,
455 })
456 }
457}
458
459fn parse_multipoint(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
460 match data_type {
461 DataType::List(inner_field) => parse_point(inner_field.data_type()),
462 DataType::LargeList(inner_field) => parse_point(inner_field.data_type()),
463 dt => Err(ArrowError::SchemaError(format!(
464 "Unexpected data type {dt}"
465 ))),
466 }
467}
468
469impl MultiLineStringType {
470 pub fn data_type(&self) -> DataType {
493 let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
494 let vertices_field = Field::new("vertices", coords_type, false);
495 let linestrings_field = Field::new_list("linestrings", vertices_field, false).into();
496 DataType::List(linestrings_field)
497 }
498}
499
500impl ExtensionType for MultiLineStringType {
501 const NAME: &'static str = "geoarrow.multilinestring";
502
503 type Metadata = Arc<Metadata>;
504
505 fn metadata(&self) -> &Self::Metadata {
506 self.metadata()
507 }
508
509 fn serialize_metadata(&self) -> Option<String> {
510 self.metadata.serialize()
511 }
512
513 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
514 Ok(Arc::new(Metadata::deserialize(metadata)?))
515 }
516
517 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
518 let (coord_type, dim) = parse_multilinestring(data_type)?;
519 if coord_type != self.coord_type {
520 return Err(ArrowError::SchemaError(format!(
521 "Expected coordinate type {:?}, but got {:?}",
522 self.coord_type, coord_type
523 )));
524 }
525 if dim != self.dim {
526 return Err(ArrowError::SchemaError(format!(
527 "Expected dimension {:?}, but got {:?}",
528 self.dim, dim
529 )));
530 }
531 Ok(())
532 }
533
534 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
535 let (coord_type, dim) = parse_multilinestring(data_type)?;
536 Ok(Self {
537 coord_type,
538 dim,
539 metadata,
540 })
541 }
542}
543
544fn parse_multilinestring(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
545 match data_type {
546 DataType::List(inner1) => match inner1.data_type() {
547 DataType::List(inner2) => parse_point(inner2.data_type()),
548 dt => Err(ArrowError::SchemaError(format!(
549 "Unexpected inner multilinestring data type: {dt}"
550 ))),
551 },
552 DataType::LargeList(inner1) => match inner1.data_type() {
553 DataType::LargeList(inner2) => parse_point(inner2.data_type()),
554 dt => Err(ArrowError::SchemaError(format!(
555 "Unexpected inner multilinestring data type: {dt}"
556 ))),
557 },
558 dt => Err(ArrowError::SchemaError(format!(
559 "Unexpected data type parsing multilinestring: {dt}"
560 ))),
561 }
562}
563
564impl MultiPolygonType {
565 pub fn data_type(&self) -> DataType {
588 let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
589 let vertices_field = Field::new("vertices", coords_type, false);
590 let rings_field = Field::new_list("rings", vertices_field, false);
591 let polygons_field = Field::new_list("polygons", rings_field, false).into();
592 DataType::List(polygons_field)
593 }
594}
595
596impl ExtensionType for MultiPolygonType {
597 const NAME: &'static str = "geoarrow.multipolygon";
598
599 type Metadata = Arc<Metadata>;
600
601 fn metadata(&self) -> &Self::Metadata {
602 self.metadata()
603 }
604
605 fn serialize_metadata(&self) -> Option<String> {
606 self.metadata.serialize()
607 }
608
609 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
610 Ok(Arc::new(Metadata::deserialize(metadata)?))
611 }
612
613 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
614 let (coord_type, dim) = parse_multipolygon(data_type)?;
615 if coord_type != self.coord_type {
616 return Err(ArrowError::SchemaError(format!(
617 "Expected coordinate type {:?}, but got {:?}",
618 self.coord_type, coord_type
619 )));
620 }
621 if dim != self.dim {
622 return Err(ArrowError::SchemaError(format!(
623 "Expected dimension {:?}, but got {:?}",
624 self.dim, dim
625 )));
626 }
627 Ok(())
628 }
629
630 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
631 let (coord_type, dim) = parse_multipolygon(data_type)?;
632 Ok(Self {
633 coord_type,
634 dim,
635 metadata,
636 })
637 }
638}
639
640fn parse_multipolygon(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
641 match data_type {
642 DataType::List(inner1) => match inner1.data_type() {
643 DataType::List(inner2) => match inner2.data_type() {
644 DataType::List(inner3) => parse_point(inner3.data_type()),
645 dt => Err(ArrowError::SchemaError(format!(
646 "Unexpected inner2 multipolygon data type: {dt}"
647 ))),
648 },
649 dt => Err(ArrowError::SchemaError(format!(
650 "Unexpected inner1 multipolygon data type: {dt}"
651 ))),
652 },
653 DataType::LargeList(inner1) => match inner1.data_type() {
654 DataType::LargeList(inner2) => match inner2.data_type() {
655 DataType::LargeList(inner3) => parse_point(inner3.data_type()),
656 dt => Err(ArrowError::SchemaError(format!(
657 "Unexpected inner2 multipolygon data type: {dt}"
658 ))),
659 },
660 dt => Err(ArrowError::SchemaError(format!(
661 "Unexpected inner1 multipolygon data type: {dt}"
662 ))),
663 },
664 dt => Err(ArrowError::SchemaError(format!(
665 "Unexpected data type {dt}"
666 ))),
667 }
668}
669
670impl GeometryCollectionType {
671 pub fn data_type(&self) -> DataType {
729 let geometries_field = Field::new(
730 "geometries",
731 mixed_data_type(self.coord_type, self.dim),
732 false,
733 )
734 .into();
735 DataType::List(geometries_field)
736 }
737}
738
739fn mixed_data_type(coord_type: CoordType, dim: Dimension) -> DataType {
740 let mut fields = vec![];
741 let mut type_ids = vec![];
742
743 match dim {
744 Dimension::XY => type_ids.extend([1, 2, 3, 4, 5, 6]),
745 Dimension::XYZ => type_ids.extend([11, 12, 13, 14, 15, 16]),
746 Dimension::XYM => type_ids.extend([21, 22, 23, 24, 25, 26]),
747 Dimension::XYZM => type_ids.extend([31, 32, 33, 34, 35, 36]),
748 }
749
750 macro_rules! push_field {
753 ($field_name:literal, $geom_type:ident) => {{
754 fields.push(Field::new(
755 $field_name,
756 $geom_type {
757 coord_type,
758 dim,
759 metadata: Metadata::default().into(),
760 }
761 .data_type(),
762 true,
763 ));
764 }};
765 }
766
767 match dim {
768 Dimension::XY => {
769 push_field!("Point", PointType);
770 push_field!("LineString", LineStringType);
771 push_field!("Polygon", PolygonType);
772 push_field!("MultiPoint", MultiPointType);
773 push_field!("MultiLineString", MultiLineStringType);
774 push_field!("MultiPolygon", MultiPolygonType);
775 }
776 Dimension::XYZ => {
777 push_field!("Point Z", PointType);
778 push_field!("LineString Z", LineStringType);
779 push_field!("Polygon Z", PolygonType);
780 push_field!("MultiPoint Z", MultiPointType);
781 push_field!("MultiLineString Z", MultiLineStringType);
782 push_field!("MultiPolygon Z", MultiPolygonType);
783 }
784 Dimension::XYM => {
785 push_field!("Point M", PointType);
786 push_field!("LineString M", LineStringType);
787 push_field!("Polygon M", PolygonType);
788 push_field!("MultiPoint M", MultiPointType);
789 push_field!("MultiLineString M", MultiLineStringType);
790 push_field!("MultiPolygon M", MultiPolygonType);
791 }
792 Dimension::XYZM => {
793 push_field!("Point ZM", PointType);
794 push_field!("LineString ZM", LineStringType);
795 push_field!("Polygon ZM", PolygonType);
796 push_field!("MultiPoint ZM", MultiPointType);
797 push_field!("MultiLineString ZM", MultiLineStringType);
798 push_field!("MultiPolygon ZM", MultiPolygonType);
799 }
800 }
801
802 let union_fields = UnionFields::new(type_ids, fields);
803 DataType::Union(union_fields, UnionMode::Dense)
804}
805
806impl ExtensionType for GeometryCollectionType {
807 const NAME: &'static str = "geoarrow.geometrycollection";
808
809 type Metadata = Arc<Metadata>;
810
811 fn metadata(&self) -> &Self::Metadata {
812 self.metadata()
813 }
814
815 fn serialize_metadata(&self) -> Option<String> {
816 self.metadata.serialize()
817 }
818
819 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
820 Ok(Arc::new(Metadata::deserialize(metadata)?))
821 }
822
823 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
824 let (coord_type, dim) = parse_geometry_collection(data_type)?;
825 if coord_type != self.coord_type {
826 return Err(ArrowError::SchemaError(format!(
827 "Expected coordinate type {:?}, but got {:?}",
828 self.coord_type, coord_type
829 )));
830 }
831 if dim != self.dim {
832 return Err(ArrowError::SchemaError(format!(
833 "Expected dimension {:?}, but got {:?}",
834 self.dim, dim
835 )));
836 }
837 Ok(())
838 }
839
840 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
841 let (coord_type, dim) = parse_geometry_collection(data_type)?;
842 Ok(Self {
843 coord_type,
844 dim,
845 metadata,
846 })
847 }
848}
849
850fn parse_mixed(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
851 match data_type {
852 DataType::Union(fields, _) => {
853 let mut coord_types: HashSet<CoordType> = HashSet::new();
854 let mut dimensions: HashSet<Dimension> = HashSet::new();
855
856 fields.iter().try_for_each(|(type_id, field)| {
858 macro_rules! impl_type_id {
859 ($expected_dim:path, $parse_fn:ident) => {{
860 let (ct, dim) = $parse_fn(field.data_type())?;
861 coord_types.insert(ct);
862 assert!(matches!(dim, $expected_dim));
863 dimensions.insert(dim);
864 }};
865 }
866
867 match type_id {
868 1 => impl_type_id!(Dimension::XY, parse_point),
869 2 => impl_type_id!(Dimension::XY, parse_linestring),
870 3 => impl_type_id!(Dimension::XY, parse_polygon),
871 4 => impl_type_id!(Dimension::XY, parse_multipoint),
872 5 => impl_type_id!(Dimension::XY, parse_multilinestring),
873 6 => impl_type_id!(Dimension::XY, parse_multipolygon),
874 11 => impl_type_id!(Dimension::XYZ, parse_point),
875 12 => impl_type_id!(Dimension::XYZ, parse_linestring),
876 13 => impl_type_id!(Dimension::XYZ, parse_polygon),
877 14 => impl_type_id!(Dimension::XYZ, parse_multipoint),
878 15 => impl_type_id!(Dimension::XYZ, parse_multilinestring),
879 16 => impl_type_id!(Dimension::XYZ, parse_multipolygon),
880 21 => impl_type_id!(Dimension::XYM, parse_point),
881 22 => impl_type_id!(Dimension::XYM, parse_linestring),
882 23 => impl_type_id!(Dimension::XYM, parse_polygon),
883 24 => impl_type_id!(Dimension::XYM, parse_multipoint),
884 25 => impl_type_id!(Dimension::XYM, parse_multilinestring),
885 26 => impl_type_id!(Dimension::XYM, parse_multipolygon),
886 31 => impl_type_id!(Dimension::XYZM, parse_point),
887 32 => impl_type_id!(Dimension::XYZM, parse_linestring),
888 33 => impl_type_id!(Dimension::XYZM, parse_polygon),
889 34 => impl_type_id!(Dimension::XYZM, parse_multipoint),
890 35 => impl_type_id!(Dimension::XYZM, parse_multilinestring),
891 36 => impl_type_id!(Dimension::XYZM, parse_multipolygon),
892 id => {
893 return Err(ArrowError::SchemaError(format!(
894 "Unexpected type id parsing mixed: {id}"
895 )));
896 }
897 };
898 Ok::<_, ArrowError>(())
899 })?;
900
901 if coord_types.len() > 1 {
902 return Err(ArrowError::SchemaError(
903 "Multi coord types in union".to_string(),
904 ));
905 }
906 if dimensions.len() > 1 {
907 return Err(ArrowError::SchemaError(
908 "Multi dimensions types in union".to_string(),
909 ));
910 }
911
912 let coord_type = coord_types.drain().next().unwrap();
913 let dimension = dimensions.drain().next().unwrap();
914 Ok((coord_type, dimension))
915 }
916 dt => Err(ArrowError::SchemaError(format!(
917 "Unexpected mixed data type: {dt}"
918 ))),
919 }
920}
921
922fn parse_geometry_collection(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
923 match data_type {
926 DataType::List(inner_field) | DataType::LargeList(inner_field) => {
927 parse_mixed(inner_field.data_type())
928 }
929 dt => Err(ArrowError::SchemaError(format!(
930 "Unexpected geometry collection data type: {dt}"
931 ))),
932 }
933}
934
935#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
940pub struct GeometryType {
941 coord_type: CoordType,
942 metadata: Arc<Metadata>,
943}
944
945impl GeometryType {
946 pub fn new(metadata: Arc<Metadata>) -> Self {
948 Self {
949 coord_type: Default::default(),
950 metadata,
951 }
952 }
953
954 pub fn with_coord_type(self, coord_type: CoordType) -> Self {
956 Self { coord_type, ..self }
957 }
958
959 pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
961 Self { metadata, ..self }
962 }
963
964 pub fn coord_type(&self) -> CoordType {
966 self.coord_type
967 }
968
969 pub fn metadata(&self) -> &Arc<Metadata> {
971 &self.metadata
972 }
973
974 pub fn data_type(&self) -> DataType {
976 let mut fields = vec![];
977 let type_ids = vec![
978 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 31, 32,
979 33, 34, 35, 36, 37,
980 ];
981
982 macro_rules! push_field {
985 ($field_name:literal, $geom_type:ident, $dim:path) => {{
986 fields.push(Field::new(
987 $field_name,
988 $geom_type {
989 coord_type: self.coord_type,
990 dim: $dim,
991 metadata: Metadata::default().into(),
992 }
993 .data_type(),
994 true,
995 ));
996 }};
997 }
998
999 push_field!("Point", PointType, Dimension::XY);
1000 push_field!("LineString", LineStringType, Dimension::XY);
1001 push_field!("Polygon", PolygonType, Dimension::XY);
1002 push_field!("MultiPoint", MultiPointType, Dimension::XY);
1003 push_field!("MultiLineString", MultiLineStringType, Dimension::XY);
1004 push_field!("MultiPolygon", MultiPolygonType, Dimension::XY);
1005 push_field!("GeometryCollection", GeometryCollectionType, Dimension::XY);
1006
1007 push_field!("Point Z", PointType, Dimension::XYZ);
1008 push_field!("LineString Z", LineStringType, Dimension::XYZ);
1009 push_field!("Polygon Z", PolygonType, Dimension::XYZ);
1010 push_field!("MultiPoint Z", MultiPointType, Dimension::XYZ);
1011 push_field!("MultiLineString Z", MultiLineStringType, Dimension::XYZ);
1012 push_field!("MultiPolygon Z", MultiPolygonType, Dimension::XYZ);
1013 push_field!(
1014 "GeometryCollection Z",
1015 GeometryCollectionType,
1016 Dimension::XYZ
1017 );
1018
1019 push_field!("Point M", PointType, Dimension::XYM);
1020 push_field!("LineString M", LineStringType, Dimension::XYM);
1021 push_field!("Polygon M", PolygonType, Dimension::XYM);
1022 push_field!("MultiPoint M", MultiPointType, Dimension::XYM);
1023 push_field!("MultiLineString M", MultiLineStringType, Dimension::XYM);
1024 push_field!("MultiPolygon M", MultiPolygonType, Dimension::XYM);
1025 push_field!(
1026 "GeometryCollection M",
1027 GeometryCollectionType,
1028 Dimension::XYM
1029 );
1030
1031 push_field!("Point ZM", PointType, Dimension::XYZM);
1032 push_field!("LineString ZM", LineStringType, Dimension::XYZM);
1033 push_field!("Polygon ZM", PolygonType, Dimension::XYZM);
1034 push_field!("MultiPoint ZM", MultiPointType, Dimension::XYZM);
1035 push_field!("MultiLineString ZM", MultiLineStringType, Dimension::XYZM);
1036 push_field!("MultiPolygon ZM", MultiPolygonType, Dimension::XYZM);
1037 push_field!(
1038 "GeometryCollection ZM",
1039 GeometryCollectionType,
1040 Dimension::XYZM
1041 );
1042
1043 let union_fields = UnionFields::new(type_ids, fields);
1044 DataType::Union(union_fields, UnionMode::Dense)
1045 }
1046
1047 pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
1049 Field::new(name, self.data_type(), nullable).with_extension_type(self.clone())
1050 }
1051}
1052
1053impl ExtensionType for GeometryType {
1054 const NAME: &'static str = "geoarrow.geometry";
1055
1056 type Metadata = Arc<Metadata>;
1057
1058 fn metadata(&self) -> &Self::Metadata {
1059 self.metadata()
1060 }
1061
1062 fn serialize_metadata(&self) -> Option<String> {
1063 self.metadata.serialize()
1064 }
1065
1066 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1067 Ok(Arc::new(Metadata::deserialize(metadata)?))
1068 }
1069
1070 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1071 let coord_type = parse_geometry(data_type)?;
1072 if coord_type != self.coord_type {
1073 return Err(ArrowError::SchemaError(format!(
1074 "Expected coordinate type {:?}, but got {:?}",
1075 self.coord_type, coord_type
1076 )));
1077 }
1078 Ok(())
1079 }
1080
1081 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1082 let coord_type = parse_geometry(data_type)?;
1083 Ok(Self {
1084 coord_type,
1085 metadata,
1086 })
1087 }
1088}
1089
1090fn parse_geometry(data_type: &DataType) -> Result<CoordType, ArrowError> {
1091 if let DataType::Union(fields, _mode) = data_type {
1092 let mut coord_types: HashSet<CoordType> = HashSet::new();
1093
1094 fields.iter().try_for_each(|(type_id, field)| {
1096 macro_rules! impl_type_id {
1097 ($expected_dim:path, $parse_fn:ident) => {{
1098 let (ct, dim) = $parse_fn(field.data_type())?;
1099 coord_types.insert(ct);
1100 assert!(matches!(dim, $expected_dim));
1101 }};
1102 }
1103
1104 match type_id {
1105 1 => impl_type_id!(Dimension::XY, parse_point),
1106 2 => impl_type_id!(Dimension::XY, parse_linestring),
1107 3 => impl_type_id!(Dimension::XY, parse_polygon),
1108 4 => impl_type_id!(Dimension::XY, parse_multipoint),
1109 5 => impl_type_id!(Dimension::XY, parse_multilinestring),
1110 6 => impl_type_id!(Dimension::XY, parse_multipolygon),
1111 7 => impl_type_id!(Dimension::XY, parse_geometry_collection),
1112 11 => impl_type_id!(Dimension::XYZ, parse_point),
1113 12 => impl_type_id!(Dimension::XYZ, parse_linestring),
1114 13 => impl_type_id!(Dimension::XYZ, parse_polygon),
1115 14 => impl_type_id!(Dimension::XYZ, parse_multipoint),
1116 15 => impl_type_id!(Dimension::XYZ, parse_multilinestring),
1117 16 => impl_type_id!(Dimension::XYZ, parse_multipolygon),
1118 17 => impl_type_id!(Dimension::XYZ, parse_geometry_collection),
1119 21 => impl_type_id!(Dimension::XYM, parse_point),
1120 22 => impl_type_id!(Dimension::XYM, parse_linestring),
1121 23 => impl_type_id!(Dimension::XYM, parse_polygon),
1122 24 => impl_type_id!(Dimension::XYM, parse_multipoint),
1123 25 => impl_type_id!(Dimension::XYM, parse_multilinestring),
1124 26 => impl_type_id!(Dimension::XYM, parse_multipolygon),
1125 27 => impl_type_id!(Dimension::XYM, parse_geometry_collection),
1126 31 => impl_type_id!(Dimension::XYZM, parse_point),
1127 32 => impl_type_id!(Dimension::XYZM, parse_linestring),
1128 33 => impl_type_id!(Dimension::XYZM, parse_polygon),
1129 34 => impl_type_id!(Dimension::XYZM, parse_multipoint),
1130 35 => impl_type_id!(Dimension::XYZM, parse_multilinestring),
1131 36 => impl_type_id!(Dimension::XYZM, parse_multipolygon),
1132 37 => impl_type_id!(Dimension::XYZM, parse_geometry_collection),
1133 id => {
1134 return Err(ArrowError::SchemaError(format!(
1135 "Unexpected type id parsing geometry: {id}"
1136 )));
1137 }
1138 };
1139 Ok::<_, ArrowError>(())
1140 })?;
1141
1142 if coord_types.len() > 1 {
1143 return Err(ArrowError::SchemaError(
1144 "Multi coord types in union".to_string(),
1145 ));
1146 }
1147
1148 let coord_type = coord_types.drain().next().unwrap();
1149 Ok(coord_type)
1150 } else {
1151 Err(ArrowError::SchemaError("Expected union type".to_string()))
1152 }
1153}
1154
1155#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1160pub struct BoxType {
1161 dim: Dimension,
1162 metadata: Arc<Metadata>,
1163}
1164
1165impl BoxType {
1166 pub fn new(dim: Dimension, metadata: Arc<Metadata>) -> Self {
1168 Self { dim, metadata }
1169 }
1170
1171 pub fn with_dimension(self, dim: Dimension) -> Self {
1173 Self { dim, ..self }
1174 }
1175
1176 pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1178 Self { metadata, ..self }
1179 }
1180
1181 pub fn dimension(&self) -> Dimension {
1183 self.dim
1184 }
1185
1186 pub fn metadata(&self) -> &Arc<Metadata> {
1188 &self.metadata
1189 }
1190
1191 pub fn data_type(&self) -> DataType {
1215 let values_fields = match self.dim {
1216 Dimension::XY => {
1217 vec![
1218 Field::new("xmin", DataType::Float64, false),
1219 Field::new("ymin", DataType::Float64, false),
1220 Field::new("xmax", DataType::Float64, false),
1221 Field::new("ymax", DataType::Float64, false),
1222 ]
1223 }
1224 Dimension::XYZ => {
1225 vec![
1226 Field::new("xmin", DataType::Float64, false),
1227 Field::new("ymin", DataType::Float64, false),
1228 Field::new("zmin", DataType::Float64, false),
1229 Field::new("xmax", DataType::Float64, false),
1230 Field::new("ymax", DataType::Float64, false),
1231 Field::new("zmax", DataType::Float64, false),
1232 ]
1233 }
1234 Dimension::XYM => {
1235 vec![
1236 Field::new("xmin", DataType::Float64, false),
1237 Field::new("ymin", DataType::Float64, false),
1238 Field::new("mmin", DataType::Float64, false),
1239 Field::new("xmax", DataType::Float64, false),
1240 Field::new("ymax", DataType::Float64, false),
1241 Field::new("mmax", DataType::Float64, false),
1242 ]
1243 }
1244 Dimension::XYZM => {
1245 vec![
1246 Field::new("xmin", DataType::Float64, false),
1247 Field::new("ymin", DataType::Float64, false),
1248 Field::new("zmin", DataType::Float64, false),
1249 Field::new("mmin", DataType::Float64, false),
1250 Field::new("xmax", DataType::Float64, false),
1251 Field::new("ymax", DataType::Float64, false),
1252 Field::new("zmax", DataType::Float64, false),
1253 Field::new("mmax", DataType::Float64, false),
1254 ]
1255 }
1256 };
1257 DataType::Struct(values_fields.into())
1258 }
1259
1260 pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
1262 Field::new(name, self.data_type(), nullable).with_extension_type(self.clone())
1263 }
1264}
1265
1266impl ExtensionType for BoxType {
1267 const NAME: &'static str = "geoarrow.box";
1268
1269 type Metadata = Arc<Metadata>;
1270
1271 fn metadata(&self) -> &Self::Metadata {
1272 self.metadata()
1273 }
1274
1275 fn serialize_metadata(&self) -> Option<String> {
1276 self.metadata.serialize()
1277 }
1278
1279 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1280 Ok(Arc::new(Metadata::deserialize(metadata)?))
1281 }
1282
1283 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1284 let dim = parse_box(data_type)?;
1285 if dim != self.dim {
1286 return Err(ArrowError::SchemaError(format!(
1287 "Expected dimension {:?}, but got {:?}",
1288 self.dim, dim
1289 )));
1290 }
1291 Ok(())
1292 }
1293
1294 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1295 let dim = parse_box(data_type)?;
1296 Ok(Self { dim, metadata })
1297 }
1298}
1299
1300fn parse_box(data_type: &DataType) -> Result<Dimension, ArrowError> {
1301 match data_type {
1302 DataType::Struct(struct_fields) => match struct_fields.len() {
1303 4 => Ok(Dimension::XY),
1304 6 => {
1305 let names: HashSet<&str> =
1306 struct_fields.iter().map(|f| f.name().as_str()).collect();
1307 if names.contains("mmin") && names.contains("mmax") {
1308 Ok(Dimension::XYM)
1309 } else if names.contains("zmin") && names.contains("zmax") {
1310 Ok(Dimension::XYZ)
1311 } else {
1312 Err(ArrowError::SchemaError(format!(
1313 "unexpected either mmin and mmax or zmin and zmax for struct with 6 fields. Got names: {names:?}",
1314 )))
1315 }
1316 }
1317 8 => Ok(Dimension::XYZM),
1318 num_fields => Err(ArrowError::SchemaError(format!(
1319 "unexpected number of struct fields: {num_fields}",
1320 ))),
1321 },
1322 dt => Err(ArrowError::SchemaError(format!(
1323 "unexpected data type parsing box: {dt:?}",
1324 ))),
1325 }
1326}
1327
1328pub type RectType = BoxType;
1334
1335#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
1340pub struct WkbType {
1341 metadata: Arc<Metadata>,
1342}
1343
1344impl WkbType {
1345 pub fn new(metadata: Arc<Metadata>) -> Self {
1347 Self { metadata }
1348 }
1349
1350 pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1352 Self { metadata }
1353 }
1354
1355 pub fn metadata(&self) -> &Arc<Metadata> {
1357 &self.metadata
1358 }
1359}
1360
1361impl ExtensionType for WkbType {
1362 const NAME: &'static str = "geoarrow.wkb";
1363
1364 type Metadata = Arc<Metadata>;
1365
1366 fn metadata(&self) -> &Self::Metadata {
1367 self.metadata()
1368 }
1369
1370 fn serialize_metadata(&self) -> Option<String> {
1371 self.metadata.serialize()
1372 }
1373
1374 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1375 Ok(Arc::new(Metadata::deserialize(metadata)?))
1376 }
1377
1378 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1379 match data_type {
1380 DataType::Binary | DataType::LargeBinary | DataType::BinaryView => Ok(()),
1381 dt => Err(ArrowError::SchemaError(format!(
1382 "Unexpected data type {dt}"
1383 ))),
1384 }
1385 }
1386
1387 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1388 let wkb = Self { metadata };
1389 wkb.supports_data_type(data_type)?;
1390 Ok(wkb)
1391 }
1392}
1393
1394#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
1399pub struct WktType {
1400 metadata: Arc<Metadata>,
1401}
1402
1403impl WktType {
1404 pub fn new(metadata: Arc<Metadata>) -> Self {
1406 Self { metadata }
1407 }
1408
1409 pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1411 Self { metadata }
1412 }
1413
1414 pub fn metadata(&self) -> &Arc<Metadata> {
1416 &self.metadata
1417 }
1418}
1419
1420impl ExtensionType for WktType {
1421 const NAME: &'static str = "geoarrow.wkt";
1422
1423 type Metadata = Arc<Metadata>;
1424
1425 fn metadata(&self) -> &Self::Metadata {
1426 self.metadata()
1427 }
1428
1429 fn serialize_metadata(&self) -> Option<String> {
1430 self.metadata.serialize()
1431 }
1432
1433 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1434 Ok(Arc::new(Metadata::deserialize(metadata)?))
1435 }
1436
1437 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1438 match data_type {
1439 DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Ok(()),
1440 dt => Err(ArrowError::SchemaError(format!(
1441 "Unexpected data type {dt}"
1442 ))),
1443 }
1444 }
1445
1446 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1447 let wkb = Self { metadata };
1448 wkb.supports_data_type(data_type)?;
1449 Ok(wkb)
1450 }
1451}
1452
1453fn coord_type_to_data_type(coord_type: CoordType, dim: Dimension) -> DataType {
1454 match (coord_type, dim) {
1455 (CoordType::Interleaved, Dimension::XY) => {
1456 let values_field = Field::new("xy", DataType::Float64, false);
1457 DataType::FixedSizeList(Arc::new(values_field), 2)
1458 }
1459 (CoordType::Interleaved, Dimension::XYZ) => {
1460 let values_field = Field::new("xyz", DataType::Float64, false);
1461 DataType::FixedSizeList(Arc::new(values_field), 3)
1462 }
1463 (CoordType::Interleaved, Dimension::XYM) => {
1464 let values_field = Field::new("xym", DataType::Float64, false);
1465 DataType::FixedSizeList(Arc::new(values_field), 3)
1466 }
1467 (CoordType::Interleaved, Dimension::XYZM) => {
1468 let values_field = Field::new("xyzm", DataType::Float64, false);
1469 DataType::FixedSizeList(Arc::new(values_field), 4)
1470 }
1471 (CoordType::Separated, Dimension::XY) => {
1472 let values_fields = vec![
1473 Field::new("x", DataType::Float64, false),
1474 Field::new("y", DataType::Float64, false),
1475 ];
1476 DataType::Struct(values_fields.into())
1477 }
1478 (CoordType::Separated, Dimension::XYZ) => {
1479 let values_fields = vec![
1480 Field::new("x", DataType::Float64, false),
1481 Field::new("y", DataType::Float64, false),
1482 Field::new("z", DataType::Float64, false),
1483 ];
1484 DataType::Struct(values_fields.into())
1485 }
1486 (CoordType::Separated, Dimension::XYM) => {
1487 let values_fields = vec![
1488 Field::new("x", DataType::Float64, false),
1489 Field::new("y", DataType::Float64, false),
1490 Field::new("m", DataType::Float64, false),
1491 ];
1492 DataType::Struct(values_fields.into())
1493 }
1494 (CoordType::Separated, Dimension::XYZM) => {
1495 let values_fields = vec![
1496 Field::new("x", DataType::Float64, false),
1497 Field::new("y", DataType::Float64, false),
1498 Field::new("z", DataType::Float64, false),
1499 Field::new("m", DataType::Float64, false),
1500 ];
1501 DataType::Struct(values_fields.into())
1502 }
1503 }
1504}
1505
1506#[cfg(test)]
1507mod test {
1508 use std::sync::Arc;
1509
1510 use arrow_schema::{DataType, Field};
1511
1512 use super::*;
1513 use crate::crs::Crs;
1514 use crate::edges::Edges;
1515
1516 #[test]
1517 fn test_point_interleaved_xy() {
1518 let data_type =
1519 DataType::FixedSizeList(Arc::new(Field::new("xy", DataType::Float64, false)), 2);
1520 let metadata = Arc::new(Metadata::default());
1521 let type_ = PointType::try_new(&data_type, metadata).unwrap();
1522
1523 assert_eq!(type_.coord_type, CoordType::Interleaved);
1524 assert_eq!(type_.dim, Dimension::XY);
1525 assert_eq!(type_.serialize_metadata(), None);
1526 }
1527
1528 #[test]
1529 fn test_point_separated_xyz() {
1530 let data_type = DataType::Struct(
1531 vec![
1532 Field::new("x", DataType::Float64, false),
1533 Field::new("y", DataType::Float64, false),
1534 Field::new("z", DataType::Float64, false),
1535 ]
1536 .into(),
1537 );
1538 let metadata = Arc::new(Metadata::default());
1539 let type_ = PointType::try_new(&data_type, metadata).unwrap();
1540
1541 assert_eq!(type_.coord_type, CoordType::Separated);
1542 assert_eq!(type_.dim, Dimension::XYZ);
1543 assert_eq!(type_.serialize_metadata(), None);
1544 }
1545
1546 #[test]
1547 fn test_point_metadata() {
1548 let data_type =
1549 DataType::FixedSizeList(Arc::new(Field::new("xy", DataType::Float64, false)), 2);
1550 let crs = Crs::from_authority_code("EPSG:4326".to_string());
1551 let metadata = Arc::new(Metadata::new(crs, Some(Edges::Spherical)));
1552 let type_ = PointType::try_new(&data_type, metadata).unwrap();
1553
1554 let expected = r#"{"crs":"EPSG:4326","crs_type":"authority_code","edges":"spherical"}"#;
1555 assert_eq!(type_.serialize_metadata().as_deref(), Some(expected));
1556 }
1557
1558 #[test]
1559 fn geometry_data_type() {
1560 let typ = GeometryCollectionType::new(Dimension::XY, Default::default());
1561 dbg!(typ.data_type());
1562 }
1563}