1use std::collections::HashSet;
2use std::sync::{Arc, LazyLock};
3
4use arrow_schema::extension::ExtensionType;
5use arrow_schema::{ArrowError, DataType, Field, UnionFields, UnionMode};
6
7use crate::error::GeoArrowError;
8use crate::metadata::Metadata;
9use crate::{CoordType, Dimension};
10
11macro_rules! define_basic_type {
12 (
13 $(#[$($attrss:meta)*])*
14 $struct_name:ident
15 ) => {
16 $(#[$($attrss)*])*
17 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
18 pub struct $struct_name {
19 coord_type: CoordType,
20 dim: Dimension,
21 metadata: Arc<Metadata>,
22 }
23
24 impl $struct_name {
25 pub fn new(dim: Dimension, metadata: Arc<Metadata>) -> Self {
27 Self {
28 coord_type: Default::default(),
29 dim,
30 metadata,
31 }
32 }
33
34 pub fn with_coord_type(self, coord_type: CoordType) -> Self {
36 Self { coord_type, ..self }
37 }
38
39 pub fn with_dimension(self, dim: Dimension) -> Self {
41 Self { dim, ..self }
42 }
43
44 pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
46 Self { metadata, ..self }
47 }
48
49 pub fn coord_type(&self) -> CoordType {
51 self.coord_type
52 }
53
54 pub fn dimension(&self) -> Dimension {
56 self.dim
57 }
58
59 pub fn metadata(&self) -> &Arc<Metadata> {
61 &self.metadata
62 }
63
64 pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
66 Field::new(name, self.data_type(), nullable).with_extension_type(self.clone())
67 }
68
69 pub fn into_inner(self) -> (CoordType, Dimension, Arc<Metadata>) {
71 (self.coord_type, self.dim, self.metadata)
72 }
73 }
74 };
75}
76
77define_basic_type!(
78 PointType
83);
84define_basic_type!(
85 LineStringType
90);
91define_basic_type!(
92 PolygonType
97);
98define_basic_type!(
99 MultiPointType
104);
105define_basic_type!(
106 MultiLineStringType
111);
112define_basic_type!(
113 MultiPolygonType
118);
119define_basic_type!(
120 GeometryCollectionType
125);
126
127impl PointType {
128 pub fn data_type(&self) -> DataType {
140 coord_type_to_data_type(self.coord_type, self.dim)
141 }
142}
143
144impl ExtensionType for PointType {
145 const NAME: &'static str = "geoarrow.point";
146
147 type Metadata = Arc<Metadata>;
148
149 fn metadata(&self) -> &Self::Metadata {
150 self.metadata()
151 }
152
153 fn serialize_metadata(&self) -> Option<String> {
154 self.metadata.serialize()
155 }
156
157 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
158 Ok(Arc::new(Metadata::deserialize(metadata)?))
159 }
160
161 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
162 let (coord_type, dim) = parse_point(data_type)?;
163 if coord_type != self.coord_type {
164 return Err(ArrowError::SchemaError(format!(
165 "Expected coordinate type {:?}, but got {:?}",
166 self.coord_type, coord_type
167 )));
168 }
169 if dim != self.dim {
170 return Err(ArrowError::SchemaError(format!(
171 "Expected dimension {:?}, but got {:?}",
172 self.dim, dim
173 )));
174 }
175 Ok(())
176 }
177
178 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
179 let (coord_type, dim) = parse_point(data_type)?;
180 Ok(Self {
181 coord_type,
182 dim,
183 metadata,
184 })
185 }
186}
187
188fn parse_point(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
189 match data_type {
190 DataType::FixedSizeList(inner_field, list_size) => {
191 let dim_parsed_from_field = Dimension::from_interleaved_field(inner_field)?;
192 if dim_parsed_from_field.size() != *list_size as usize {
193 Err(GeoArrowError::InvalidGeoArrow(format!(
194 "Field metadata suggests list of size {}, but list size is {}",
195 dim_parsed_from_field.size(),
196 list_size
197 ))
198 .into())
199 } else {
200 Ok((CoordType::Interleaved, dim_parsed_from_field))
201 }
202 }
203 DataType::Struct(struct_fields) => Ok((
204 CoordType::Separated,
205 Dimension::from_separated_field(struct_fields)?,
206 )),
207 dt => Err(ArrowError::SchemaError(format!(
208 "Unexpected data type {dt}"
209 ))),
210 }
211}
212
213impl LineStringType {
214 pub fn data_type(&self) -> DataType {
232 let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
233 let vertices_field = Field::new("vertices", coords_type, false).into();
234 DataType::List(vertices_field)
235 }
236}
237
238impl ExtensionType for LineStringType {
239 const NAME: &'static str = "geoarrow.linestring";
240
241 type Metadata = Arc<Metadata>;
242
243 fn metadata(&self) -> &Self::Metadata {
244 self.metadata()
245 }
246
247 fn serialize_metadata(&self) -> Option<String> {
248 self.metadata.serialize()
249 }
250
251 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
252 Ok(Arc::new(Metadata::deserialize(metadata)?))
253 }
254
255 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
256 let (coord_type, dim) = parse_linestring(data_type)?;
257 if coord_type != self.coord_type {
258 return Err(ArrowError::SchemaError(format!(
259 "Expected coordinate type {:?}, but got {:?}",
260 self.coord_type, coord_type
261 )));
262 }
263 if dim != self.dim {
264 return Err(ArrowError::SchemaError(format!(
265 "Expected dimension {:?}, but got {:?}",
266 self.dim, dim
267 )));
268 }
269 Ok(())
270 }
271
272 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
273 let (coord_type, dim) = parse_linestring(data_type)?;
274 Ok(Self {
275 coord_type,
276 dim,
277 metadata,
278 })
279 }
280}
281
282fn parse_linestring(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
283 match data_type {
284 DataType::List(inner_field) | DataType::LargeList(inner_field) => {
285 parse_point(inner_field.data_type())
286 }
287 dt => Err(ArrowError::SchemaError(format!(
288 "Unexpected data type {dt}"
289 ))),
290 }
291}
292
293impl PolygonType {
294 pub fn data_type(&self) -> DataType {
316 let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
317 let vertices_field = Field::new("vertices", coords_type, false);
318 let rings_field = Field::new_list("rings", vertices_field, false).into();
319 DataType::List(rings_field)
320 }
321}
322
323impl ExtensionType for PolygonType {
324 const NAME: &'static str = "geoarrow.polygon";
325
326 type Metadata = Arc<Metadata>;
327
328 fn metadata(&self) -> &Self::Metadata {
329 self.metadata()
330 }
331
332 fn serialize_metadata(&self) -> Option<String> {
333 self.metadata.serialize()
334 }
335
336 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
337 Ok(Arc::new(Metadata::deserialize(metadata)?))
338 }
339
340 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
341 let (coord_type, dim) = parse_polygon(data_type)?;
342 if coord_type != self.coord_type {
343 return Err(ArrowError::SchemaError(format!(
344 "Expected coordinate type {:?}, but got {:?}",
345 self.coord_type, coord_type
346 )));
347 }
348 if dim != self.dim {
349 return Err(ArrowError::SchemaError(format!(
350 "Expected dimension {:?}, but got {:?}",
351 self.dim, dim
352 )));
353 }
354 Ok(())
355 }
356
357 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
358 let (coord_type, dim) = parse_polygon(data_type)?;
359 Ok(Self {
360 coord_type,
361 dim,
362 metadata,
363 })
364 }
365}
366
367fn parse_polygon(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
368 match data_type {
369 DataType::List(inner1) => match inner1.data_type() {
370 DataType::List(inner2) => parse_point(inner2.data_type()),
371 dt => Err(ArrowError::SchemaError(format!(
372 "Unexpected inner polygon data type: {dt}"
373 ))),
374 },
375 DataType::LargeList(inner1) => match inner1.data_type() {
376 DataType::LargeList(inner2) => parse_point(inner2.data_type()),
377 dt => Err(ArrowError::SchemaError(format!(
378 "Unexpected inner polygon data type: {dt}"
379 ))),
380 },
381 dt => Err(ArrowError::SchemaError(format!(
382 "Unexpected root data type parsing polygon {dt}"
383 ))),
384 }
385}
386
387impl MultiPointType {
388 pub fn data_type(&self) -> DataType {
409 let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
410 let vertices_field = Field::new("points", coords_type, false).into();
411 DataType::List(vertices_field)
412 }
413}
414
415impl ExtensionType for MultiPointType {
416 const NAME: &'static str = "geoarrow.multipoint";
417
418 type Metadata = Arc<Metadata>;
419
420 fn metadata(&self) -> &Self::Metadata {
421 self.metadata()
422 }
423
424 fn serialize_metadata(&self) -> Option<String> {
425 self.metadata.serialize()
426 }
427
428 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
429 Ok(Arc::new(Metadata::deserialize(metadata)?))
430 }
431
432 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
433 let (coord_type, dim) = parse_multipoint(data_type)?;
434 if coord_type != self.coord_type {
435 return Err(ArrowError::SchemaError(format!(
436 "Expected coordinate type {:?}, but got {:?}",
437 self.coord_type, coord_type
438 )));
439 }
440 if dim != self.dim {
441 return Err(ArrowError::SchemaError(format!(
442 "Expected dimension {:?}, but got {:?}",
443 self.dim, dim
444 )));
445 }
446 Ok(())
447 }
448
449 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
450 let (coord_type, dim) = parse_multipoint(data_type)?;
451 Ok(Self {
452 coord_type,
453 dim,
454 metadata,
455 })
456 }
457}
458
459fn parse_multipoint(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
460 match data_type {
461 DataType::List(inner_field) => parse_point(inner_field.data_type()),
462 DataType::LargeList(inner_field) => parse_point(inner_field.data_type()),
463 dt => Err(ArrowError::SchemaError(format!(
464 "Unexpected data type {dt}"
465 ))),
466 }
467}
468
469impl MultiLineStringType {
470 pub fn data_type(&self) -> DataType {
493 let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
494 let vertices_field = Field::new("vertices", coords_type, false);
495 let linestrings_field = Field::new_list("linestrings", vertices_field, false).into();
496 DataType::List(linestrings_field)
497 }
498}
499
500impl ExtensionType for MultiLineStringType {
501 const NAME: &'static str = "geoarrow.multilinestring";
502
503 type Metadata = Arc<Metadata>;
504
505 fn metadata(&self) -> &Self::Metadata {
506 self.metadata()
507 }
508
509 fn serialize_metadata(&self) -> Option<String> {
510 self.metadata.serialize()
511 }
512
513 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
514 Ok(Arc::new(Metadata::deserialize(metadata)?))
515 }
516
517 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
518 let (coord_type, dim) = parse_multilinestring(data_type)?;
519 if coord_type != self.coord_type {
520 return Err(ArrowError::SchemaError(format!(
521 "Expected coordinate type {:?}, but got {:?}",
522 self.coord_type, coord_type
523 )));
524 }
525 if dim != self.dim {
526 return Err(ArrowError::SchemaError(format!(
527 "Expected dimension {:?}, but got {:?}",
528 self.dim, dim
529 )));
530 }
531 Ok(())
532 }
533
534 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
535 let (coord_type, dim) = parse_multilinestring(data_type)?;
536 Ok(Self {
537 coord_type,
538 dim,
539 metadata,
540 })
541 }
542}
543
544fn parse_multilinestring(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
545 match data_type {
546 DataType::List(inner1) => match inner1.data_type() {
547 DataType::List(inner2) => parse_point(inner2.data_type()),
548 dt => Err(ArrowError::SchemaError(format!(
549 "Unexpected inner multilinestring data type: {dt}"
550 ))),
551 },
552 DataType::LargeList(inner1) => match inner1.data_type() {
553 DataType::LargeList(inner2) => parse_point(inner2.data_type()),
554 dt => Err(ArrowError::SchemaError(format!(
555 "Unexpected inner multilinestring data type: {dt}"
556 ))),
557 },
558 dt => Err(ArrowError::SchemaError(format!(
559 "Unexpected data type parsing multilinestring: {dt}"
560 ))),
561 }
562}
563
564impl MultiPolygonType {
565 pub fn data_type(&self) -> DataType {
588 let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
589 let vertices_field = Field::new("vertices", coords_type, false);
590 let rings_field = Field::new_list("rings", vertices_field, false);
591 let polygons_field = Field::new_list("polygons", rings_field, false).into();
592 DataType::List(polygons_field)
593 }
594}
595
596impl ExtensionType for MultiPolygonType {
597 const NAME: &'static str = "geoarrow.multipolygon";
598
599 type Metadata = Arc<Metadata>;
600
601 fn metadata(&self) -> &Self::Metadata {
602 self.metadata()
603 }
604
605 fn serialize_metadata(&self) -> Option<String> {
606 self.metadata.serialize()
607 }
608
609 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
610 Ok(Arc::new(Metadata::deserialize(metadata)?))
611 }
612
613 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
614 let (coord_type, dim) = parse_multipolygon(data_type)?;
615 if coord_type != self.coord_type {
616 return Err(ArrowError::SchemaError(format!(
617 "Expected coordinate type {:?}, but got {:?}",
618 self.coord_type, coord_type
619 )));
620 }
621 if dim != self.dim {
622 return Err(ArrowError::SchemaError(format!(
623 "Expected dimension {:?}, but got {:?}",
624 self.dim, dim
625 )));
626 }
627 Ok(())
628 }
629
630 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
631 let (coord_type, dim) = parse_multipolygon(data_type)?;
632 Ok(Self {
633 coord_type,
634 dim,
635 metadata,
636 })
637 }
638}
639
640fn parse_multipolygon(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
641 match data_type {
642 DataType::List(inner1) => match inner1.data_type() {
643 DataType::List(inner2) => match inner2.data_type() {
644 DataType::List(inner3) => parse_point(inner3.data_type()),
645 dt => Err(ArrowError::SchemaError(format!(
646 "Unexpected inner2 multipolygon data type: {dt}"
647 ))),
648 },
649 dt => Err(ArrowError::SchemaError(format!(
650 "Unexpected inner1 multipolygon data type: {dt}"
651 ))),
652 },
653 DataType::LargeList(inner1) => match inner1.data_type() {
654 DataType::LargeList(inner2) => match inner2.data_type() {
655 DataType::LargeList(inner3) => parse_point(inner3.data_type()),
656 dt => Err(ArrowError::SchemaError(format!(
657 "Unexpected inner2 multipolygon data type: {dt}"
658 ))),
659 },
660 dt => Err(ArrowError::SchemaError(format!(
661 "Unexpected inner1 multipolygon data type: {dt}"
662 ))),
663 },
664 dt => Err(ArrowError::SchemaError(format!(
665 "Unexpected data type {dt}"
666 ))),
667 }
668}
669
670impl GeometryCollectionType {
671 pub fn data_type(&self) -> DataType {
729 let geometries_field = Field::new(
730 "geometries",
731 mixed_data_type(self.coord_type, self.dim),
732 false,
733 )
734 .into();
735 DataType::List(geometries_field)
736 }
737}
738
739fn mixed_data_type(coord_type: CoordType, dim: Dimension) -> DataType {
740 let mut fields = vec![];
741 let mut type_ids = vec![];
742
743 match dim {
744 Dimension::XY => type_ids.extend([1, 2, 3, 4, 5, 6]),
745 Dimension::XYZ => type_ids.extend([11, 12, 13, 14, 15, 16]),
746 Dimension::XYM => type_ids.extend([21, 22, 23, 24, 25, 26]),
747 Dimension::XYZM => type_ids.extend([31, 32, 33, 34, 35, 36]),
748 }
749
750 macro_rules! push_field {
753 ($field_name:literal, $geom_type:ident) => {{
754 fields.push(Field::new(
755 $field_name,
756 $geom_type {
757 coord_type,
758 dim,
759 metadata: Metadata::default().into(),
760 }
761 .data_type(),
762 true,
763 ));
764 }};
765 }
766
767 match dim {
768 Dimension::XY => {
769 push_field!("Point", PointType);
770 push_field!("LineString", LineStringType);
771 push_field!("Polygon", PolygonType);
772 push_field!("MultiPoint", MultiPointType);
773 push_field!("MultiLineString", MultiLineStringType);
774 push_field!("MultiPolygon", MultiPolygonType);
775 }
776 Dimension::XYZ => {
777 push_field!("Point Z", PointType);
778 push_field!("LineString Z", LineStringType);
779 push_field!("Polygon Z", PolygonType);
780 push_field!("MultiPoint Z", MultiPointType);
781 push_field!("MultiLineString Z", MultiLineStringType);
782 push_field!("MultiPolygon Z", MultiPolygonType);
783 }
784 Dimension::XYM => {
785 push_field!("Point M", PointType);
786 push_field!("LineString M", LineStringType);
787 push_field!("Polygon M", PolygonType);
788 push_field!("MultiPoint M", MultiPointType);
789 push_field!("MultiLineString M", MultiLineStringType);
790 push_field!("MultiPolygon M", MultiPolygonType);
791 }
792 Dimension::XYZM => {
793 push_field!("Point ZM", PointType);
794 push_field!("LineString ZM", LineStringType);
795 push_field!("Polygon ZM", PolygonType);
796 push_field!("MultiPoint ZM", MultiPointType);
797 push_field!("MultiLineString ZM", MultiLineStringType);
798 push_field!("MultiPolygon ZM", MultiPolygonType);
799 }
800 }
801
802 let union_fields = UnionFields::try_new(type_ids, fields)
803 .expect("type_ids and fields should have the same length");
804 DataType::Union(union_fields, UnionMode::Dense)
805}
806
807impl ExtensionType for GeometryCollectionType {
808 const NAME: &'static str = "geoarrow.geometrycollection";
809
810 type Metadata = Arc<Metadata>;
811
812 fn metadata(&self) -> &Self::Metadata {
813 self.metadata()
814 }
815
816 fn serialize_metadata(&self) -> Option<String> {
817 self.metadata.serialize()
818 }
819
820 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
821 Ok(Arc::new(Metadata::deserialize(metadata)?))
822 }
823
824 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
825 let (coord_type, dim) = parse_geometry_collection(data_type)?;
826 if coord_type != self.coord_type {
827 return Err(ArrowError::SchemaError(format!(
828 "Expected coordinate type {:?}, but got {:?}",
829 self.coord_type, coord_type
830 )));
831 }
832 if dim != self.dim {
833 return Err(ArrowError::SchemaError(format!(
834 "Expected dimension {:?}, but got {:?}",
835 self.dim, dim
836 )));
837 }
838 Ok(())
839 }
840
841 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
842 let (coord_type, dim) = parse_geometry_collection(data_type)?;
843 Ok(Self {
844 coord_type,
845 dim,
846 metadata,
847 })
848 }
849}
850
851fn parse_mixed(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
852 match data_type {
853 DataType::Union(fields, _) => {
854 let mut coord_types: HashSet<CoordType> = HashSet::new();
855 let mut dimensions: HashSet<Dimension> = HashSet::new();
856
857 fields.iter().try_for_each(|(type_id, field)| {
859 macro_rules! impl_type_id {
860 ($expected_dim:path, $parse_fn:ident) => {{
861 let (ct, dim) = $parse_fn(field.data_type())?;
862 coord_types.insert(ct);
863 assert!(matches!(dim, $expected_dim));
864 dimensions.insert(dim);
865 }};
866 }
867
868 match type_id {
869 1 => impl_type_id!(Dimension::XY, parse_point),
870 2 => impl_type_id!(Dimension::XY, parse_linestring),
871 3 => impl_type_id!(Dimension::XY, parse_polygon),
872 4 => impl_type_id!(Dimension::XY, parse_multipoint),
873 5 => impl_type_id!(Dimension::XY, parse_multilinestring),
874 6 => impl_type_id!(Dimension::XY, parse_multipolygon),
875 11 => impl_type_id!(Dimension::XYZ, parse_point),
876 12 => impl_type_id!(Dimension::XYZ, parse_linestring),
877 13 => impl_type_id!(Dimension::XYZ, parse_polygon),
878 14 => impl_type_id!(Dimension::XYZ, parse_multipoint),
879 15 => impl_type_id!(Dimension::XYZ, parse_multilinestring),
880 16 => impl_type_id!(Dimension::XYZ, parse_multipolygon),
881 21 => impl_type_id!(Dimension::XYM, parse_point),
882 22 => impl_type_id!(Dimension::XYM, parse_linestring),
883 23 => impl_type_id!(Dimension::XYM, parse_polygon),
884 24 => impl_type_id!(Dimension::XYM, parse_multipoint),
885 25 => impl_type_id!(Dimension::XYM, parse_multilinestring),
886 26 => impl_type_id!(Dimension::XYM, parse_multipolygon),
887 31 => impl_type_id!(Dimension::XYZM, parse_point),
888 32 => impl_type_id!(Dimension::XYZM, parse_linestring),
889 33 => impl_type_id!(Dimension::XYZM, parse_polygon),
890 34 => impl_type_id!(Dimension::XYZM, parse_multipoint),
891 35 => impl_type_id!(Dimension::XYZM, parse_multilinestring),
892 36 => impl_type_id!(Dimension::XYZM, parse_multipolygon),
893 id => {
894 return Err(ArrowError::SchemaError(format!(
895 "Unexpected type id parsing mixed: {id}"
896 )));
897 }
898 };
899 Ok::<_, ArrowError>(())
900 })?;
901
902 if coord_types.len() > 1 {
903 return Err(ArrowError::SchemaError(
904 "Multi coord types in union".to_string(),
905 ));
906 }
907 if dimensions.len() > 1 {
908 return Err(ArrowError::SchemaError(
909 "Multi dimensions types in union".to_string(),
910 ));
911 }
912
913 let coord_type = coord_types.drain().next().unwrap();
914 let dimension = dimensions.drain().next().unwrap();
915 Ok((coord_type, dimension))
916 }
917 dt => Err(ArrowError::SchemaError(format!(
918 "Unexpected mixed data type: {dt}"
919 ))),
920 }
921}
922
923fn parse_geometry_collection(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
924 match data_type {
927 DataType::List(inner_field) | DataType::LargeList(inner_field) => {
928 parse_mixed(inner_field.data_type())
929 }
930 dt => Err(ArrowError::SchemaError(format!(
931 "Unexpected geometry collection data type: {dt}"
932 ))),
933 }
934}
935
936static INTERLEAVED_XY: LazyLock<DataType> = LazyLock::new(|| {
937 let values_field = Field::new("xy", DataType::Float64, false);
938 DataType::FixedSizeList(Arc::new(values_field), 2)
939});
940
941static INTERLEAVED_XYZ: LazyLock<DataType> = LazyLock::new(|| {
942 let values_field = Field::new("xyz", DataType::Float64, false);
943 DataType::FixedSizeList(Arc::new(values_field), 3)
944});
945
946static INTERLEAVED_XYM: LazyLock<DataType> = LazyLock::new(|| {
947 let values_field = Field::new("xym", DataType::Float64, false);
948 DataType::FixedSizeList(Arc::new(values_field), 3)
949});
950
951static INTERLEAVED_XYZM: LazyLock<DataType> = LazyLock::new(|| {
952 let values_field = Field::new("xyzm", DataType::Float64, false);
953 DataType::FixedSizeList(Arc::new(values_field), 4)
954});
955
956static SEPARATED_XY: LazyLock<DataType> = LazyLock::new(|| {
957 DataType::Struct(
958 vec![
959 Field::new("x", DataType::Float64, false),
960 Field::new("y", DataType::Float64, false),
961 ]
962 .into(),
963 )
964});
965
966static SEPARATED_XYZ: LazyLock<DataType> = LazyLock::new(|| {
967 DataType::Struct(
968 vec![
969 Field::new("x", DataType::Float64, false),
970 Field::new("y", DataType::Float64, false),
971 Field::new("z", DataType::Float64, false),
972 ]
973 .into(),
974 )
975});
976
977static SEPARATED_XYM: LazyLock<DataType> = LazyLock::new(|| {
978 DataType::Struct(
979 vec![
980 Field::new("x", DataType::Float64, false),
981 Field::new("y", DataType::Float64, false),
982 Field::new("m", DataType::Float64, false),
983 ]
984 .into(),
985 )
986});
987
988static SEPARATED_XYZM: LazyLock<DataType> = LazyLock::new(|| {
989 DataType::Struct(
990 vec![
991 Field::new("x", DataType::Float64, false),
992 Field::new("y", DataType::Float64, false),
993 Field::new("z", DataType::Float64, false),
994 Field::new("m", DataType::Float64, false),
995 ]
996 .into(),
997 )
998});
999
1000#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
1005pub struct GeometryType {
1006 coord_type: CoordType,
1007 metadata: Arc<Metadata>,
1008}
1009
1010impl GeometryType {
1011 pub fn new(metadata: Arc<Metadata>) -> Self {
1013 Self {
1014 coord_type: Default::default(),
1015 metadata,
1016 }
1017 }
1018
1019 pub fn with_coord_type(self, coord_type: CoordType) -> Self {
1021 Self { coord_type, ..self }
1022 }
1023
1024 pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1026 Self { metadata, ..self }
1027 }
1028
1029 pub fn coord_type(&self) -> CoordType {
1031 self.coord_type
1032 }
1033
1034 pub fn metadata(&self) -> &Arc<Metadata> {
1036 &self.metadata
1037 }
1038
1039 pub fn data_type(&self) -> DataType {
1041 let mut fields = vec![];
1042 let type_ids = vec![
1043 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 31, 32,
1044 33, 34, 35, 36, 37,
1045 ];
1046
1047 macro_rules! push_field {
1050 ($field_name:literal, $geom_type:ident, $dim:path) => {{
1051 fields.push(Field::new(
1052 $field_name,
1053 $geom_type {
1054 coord_type: self.coord_type,
1055 dim: $dim,
1056 metadata: Metadata::default().into(),
1057 }
1058 .data_type(),
1059 true,
1060 ));
1061 }};
1062 }
1063
1064 push_field!("Point", PointType, Dimension::XY);
1065 push_field!("LineString", LineStringType, Dimension::XY);
1066 push_field!("Polygon", PolygonType, Dimension::XY);
1067 push_field!("MultiPoint", MultiPointType, Dimension::XY);
1068 push_field!("MultiLineString", MultiLineStringType, Dimension::XY);
1069 push_field!("MultiPolygon", MultiPolygonType, Dimension::XY);
1070 push_field!("GeometryCollection", GeometryCollectionType, Dimension::XY);
1071
1072 push_field!("Point Z", PointType, Dimension::XYZ);
1073 push_field!("LineString Z", LineStringType, Dimension::XYZ);
1074 push_field!("Polygon Z", PolygonType, Dimension::XYZ);
1075 push_field!("MultiPoint Z", MultiPointType, Dimension::XYZ);
1076 push_field!("MultiLineString Z", MultiLineStringType, Dimension::XYZ);
1077 push_field!("MultiPolygon Z", MultiPolygonType, Dimension::XYZ);
1078 push_field!(
1079 "GeometryCollection Z",
1080 GeometryCollectionType,
1081 Dimension::XYZ
1082 );
1083
1084 push_field!("Point M", PointType, Dimension::XYM);
1085 push_field!("LineString M", LineStringType, Dimension::XYM);
1086 push_field!("Polygon M", PolygonType, Dimension::XYM);
1087 push_field!("MultiPoint M", MultiPointType, Dimension::XYM);
1088 push_field!("MultiLineString M", MultiLineStringType, Dimension::XYM);
1089 push_field!("MultiPolygon M", MultiPolygonType, Dimension::XYM);
1090 push_field!(
1091 "GeometryCollection M",
1092 GeometryCollectionType,
1093 Dimension::XYM
1094 );
1095
1096 push_field!("Point ZM", PointType, Dimension::XYZM);
1097 push_field!("LineString ZM", LineStringType, Dimension::XYZM);
1098 push_field!("Polygon ZM", PolygonType, Dimension::XYZM);
1099 push_field!("MultiPoint ZM", MultiPointType, Dimension::XYZM);
1100 push_field!("MultiLineString ZM", MultiLineStringType, Dimension::XYZM);
1101 push_field!("MultiPolygon ZM", MultiPolygonType, Dimension::XYZM);
1102 push_field!(
1103 "GeometryCollection ZM",
1104 GeometryCollectionType,
1105 Dimension::XYZM
1106 );
1107
1108 let union_fields = UnionFields::try_new(type_ids, fields)
1109 .expect("type_ids and fields should have the same length");
1110 DataType::Union(union_fields, UnionMode::Dense)
1111 }
1112
1113 pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
1115 Field::new(name, self.data_type(), nullable).with_extension_type(self.clone())
1116 }
1117}
1118
1119impl ExtensionType for GeometryType {
1120 const NAME: &'static str = "geoarrow.geometry";
1121
1122 type Metadata = Arc<Metadata>;
1123
1124 fn metadata(&self) -> &Self::Metadata {
1125 self.metadata()
1126 }
1127
1128 fn serialize_metadata(&self) -> Option<String> {
1129 self.metadata.serialize()
1130 }
1131
1132 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1133 Ok(Arc::new(Metadata::deserialize(metadata)?))
1134 }
1135
1136 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1137 let coord_type = parse_geometry(data_type)?;
1138 if coord_type != self.coord_type {
1139 return Err(ArrowError::SchemaError(format!(
1140 "Expected coordinate type {:?}, but got {:?}",
1141 self.coord_type, coord_type
1142 )));
1143 }
1144 Ok(())
1145 }
1146
1147 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1148 let coord_type = parse_geometry(data_type)?;
1149 Ok(Self {
1150 coord_type,
1151 metadata,
1152 })
1153 }
1154}
1155
1156fn parse_geometry(data_type: &DataType) -> Result<CoordType, ArrowError> {
1157 if let DataType::Union(fields, _mode) = data_type {
1158 let mut coord_types: HashSet<CoordType> = HashSet::new();
1159
1160 fields.iter().try_for_each(|(type_id, field)| {
1162 macro_rules! impl_type_id {
1163 ($expected_dim:path, $parse_fn:ident) => {{
1164 let (ct, dim) = $parse_fn(field.data_type())?;
1165 coord_types.insert(ct);
1166 assert!(matches!(dim, $expected_dim));
1167 }};
1168 }
1169
1170 match type_id {
1171 1 => impl_type_id!(Dimension::XY, parse_point),
1172 2 => impl_type_id!(Dimension::XY, parse_linestring),
1173 3 => impl_type_id!(Dimension::XY, parse_polygon),
1174 4 => impl_type_id!(Dimension::XY, parse_multipoint),
1175 5 => impl_type_id!(Dimension::XY, parse_multilinestring),
1176 6 => impl_type_id!(Dimension::XY, parse_multipolygon),
1177 7 => impl_type_id!(Dimension::XY, parse_geometry_collection),
1178 11 => impl_type_id!(Dimension::XYZ, parse_point),
1179 12 => impl_type_id!(Dimension::XYZ, parse_linestring),
1180 13 => impl_type_id!(Dimension::XYZ, parse_polygon),
1181 14 => impl_type_id!(Dimension::XYZ, parse_multipoint),
1182 15 => impl_type_id!(Dimension::XYZ, parse_multilinestring),
1183 16 => impl_type_id!(Dimension::XYZ, parse_multipolygon),
1184 17 => impl_type_id!(Dimension::XYZ, parse_geometry_collection),
1185 21 => impl_type_id!(Dimension::XYM, parse_point),
1186 22 => impl_type_id!(Dimension::XYM, parse_linestring),
1187 23 => impl_type_id!(Dimension::XYM, parse_polygon),
1188 24 => impl_type_id!(Dimension::XYM, parse_multipoint),
1189 25 => impl_type_id!(Dimension::XYM, parse_multilinestring),
1190 26 => impl_type_id!(Dimension::XYM, parse_multipolygon),
1191 27 => impl_type_id!(Dimension::XYM, parse_geometry_collection),
1192 31 => impl_type_id!(Dimension::XYZM, parse_point),
1193 32 => impl_type_id!(Dimension::XYZM, parse_linestring),
1194 33 => impl_type_id!(Dimension::XYZM, parse_polygon),
1195 34 => impl_type_id!(Dimension::XYZM, parse_multipoint),
1196 35 => impl_type_id!(Dimension::XYZM, parse_multilinestring),
1197 36 => impl_type_id!(Dimension::XYZM, parse_multipolygon),
1198 37 => impl_type_id!(Dimension::XYZM, parse_geometry_collection),
1199 id => {
1200 return Err(ArrowError::SchemaError(format!(
1201 "Unexpected type id parsing geometry: {id}"
1202 )));
1203 }
1204 };
1205 Ok::<_, ArrowError>(())
1206 })?;
1207
1208 if coord_types.len() > 1 {
1209 return Err(ArrowError::SchemaError(
1210 "Multi coord types in union".to_string(),
1211 ));
1212 }
1213
1214 let coord_type = coord_types.drain().next().unwrap();
1215 Ok(coord_type)
1216 } else {
1217 Err(ArrowError::SchemaError("Expected union type".to_string()))
1218 }
1219}
1220
1221#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1226pub struct BoxType {
1227 dim: Dimension,
1228 metadata: Arc<Metadata>,
1229}
1230
1231impl BoxType {
1232 pub fn new(dim: Dimension, metadata: Arc<Metadata>) -> Self {
1234 Self { dim, metadata }
1235 }
1236
1237 pub fn with_dimension(self, dim: Dimension) -> Self {
1239 Self { dim, ..self }
1240 }
1241
1242 pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1244 Self { metadata, ..self }
1245 }
1246
1247 pub fn dimension(&self) -> Dimension {
1249 self.dim
1250 }
1251
1252 pub fn metadata(&self) -> &Arc<Metadata> {
1254 &self.metadata
1255 }
1256
1257 pub fn data_type(&self) -> DataType {
1281 let values_fields = match self.dim {
1282 Dimension::XY => {
1283 vec![
1284 Field::new("xmin", DataType::Float64, false),
1285 Field::new("ymin", DataType::Float64, false),
1286 Field::new("xmax", DataType::Float64, false),
1287 Field::new("ymax", DataType::Float64, false),
1288 ]
1289 }
1290 Dimension::XYZ => {
1291 vec![
1292 Field::new("xmin", DataType::Float64, false),
1293 Field::new("ymin", DataType::Float64, false),
1294 Field::new("zmin", DataType::Float64, false),
1295 Field::new("xmax", DataType::Float64, false),
1296 Field::new("ymax", DataType::Float64, false),
1297 Field::new("zmax", DataType::Float64, false),
1298 ]
1299 }
1300 Dimension::XYM => {
1301 vec![
1302 Field::new("xmin", DataType::Float64, false),
1303 Field::new("ymin", DataType::Float64, false),
1304 Field::new("mmin", DataType::Float64, false),
1305 Field::new("xmax", DataType::Float64, false),
1306 Field::new("ymax", DataType::Float64, false),
1307 Field::new("mmax", DataType::Float64, false),
1308 ]
1309 }
1310 Dimension::XYZM => {
1311 vec![
1312 Field::new("xmin", DataType::Float64, false),
1313 Field::new("ymin", DataType::Float64, false),
1314 Field::new("zmin", DataType::Float64, false),
1315 Field::new("mmin", DataType::Float64, false),
1316 Field::new("xmax", DataType::Float64, false),
1317 Field::new("ymax", DataType::Float64, false),
1318 Field::new("zmax", DataType::Float64, false),
1319 Field::new("mmax", DataType::Float64, false),
1320 ]
1321 }
1322 };
1323 DataType::Struct(values_fields.into())
1324 }
1325
1326 pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
1328 Field::new(name, self.data_type(), nullable).with_extension_type(self.clone())
1329 }
1330}
1331
1332impl ExtensionType for BoxType {
1333 const NAME: &'static str = "geoarrow.box";
1334
1335 type Metadata = Arc<Metadata>;
1336
1337 fn metadata(&self) -> &Self::Metadata {
1338 self.metadata()
1339 }
1340
1341 fn serialize_metadata(&self) -> Option<String> {
1342 self.metadata.serialize()
1343 }
1344
1345 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1346 Ok(Arc::new(Metadata::deserialize(metadata)?))
1347 }
1348
1349 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1350 let dim = parse_box(data_type)?;
1351 if dim != self.dim {
1352 return Err(ArrowError::SchemaError(format!(
1353 "Expected dimension {:?}, but got {:?}",
1354 self.dim, dim
1355 )));
1356 }
1357 Ok(())
1358 }
1359
1360 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1361 let dim = parse_box(data_type)?;
1362 Ok(Self { dim, metadata })
1363 }
1364}
1365
1366fn parse_box(data_type: &DataType) -> Result<Dimension, ArrowError> {
1367 match data_type {
1368 DataType::Struct(struct_fields) => match struct_fields.len() {
1369 4 => Ok(Dimension::XY),
1370 6 => {
1371 let names: HashSet<&str> =
1372 struct_fields.iter().map(|f| f.name().as_str()).collect();
1373 if names.contains("mmin") && names.contains("mmax") {
1374 Ok(Dimension::XYM)
1375 } else if names.contains("zmin") && names.contains("zmax") {
1376 Ok(Dimension::XYZ)
1377 } else {
1378 Err(ArrowError::SchemaError(format!(
1379 "unexpected either mmin and mmax or zmin and zmax for struct with 6 fields. Got names: {names:?}",
1380 )))
1381 }
1382 }
1383 8 => Ok(Dimension::XYZM),
1384 num_fields => Err(ArrowError::SchemaError(format!(
1385 "unexpected number of struct fields: {num_fields}",
1386 ))),
1387 },
1388 dt => Err(ArrowError::SchemaError(format!(
1389 "unexpected data type parsing box: {dt:?}",
1390 ))),
1391 }
1392}
1393
1394pub type RectType = BoxType;
1400
1401#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
1406pub struct WkbType {
1407 metadata: Arc<Metadata>,
1408}
1409
1410impl WkbType {
1411 pub fn new(metadata: Arc<Metadata>) -> Self {
1413 Self { metadata }
1414 }
1415
1416 pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1418 Self { metadata }
1419 }
1420
1421 pub fn metadata(&self) -> &Arc<Metadata> {
1423 &self.metadata
1424 }
1425}
1426
1427impl ExtensionType for WkbType {
1428 const NAME: &'static str = "geoarrow.wkb";
1429
1430 type Metadata = Arc<Metadata>;
1431
1432 fn metadata(&self) -> &Self::Metadata {
1433 self.metadata()
1434 }
1435
1436 fn serialize_metadata(&self) -> Option<String> {
1437 self.metadata.serialize()
1438 }
1439
1440 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1441 Ok(Arc::new(Metadata::deserialize(metadata)?))
1442 }
1443
1444 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1445 match data_type {
1446 DataType::Binary | DataType::LargeBinary | DataType::BinaryView => Ok(()),
1447 dt => Err(ArrowError::SchemaError(format!(
1448 "Unexpected data type {dt}"
1449 ))),
1450 }
1451 }
1452
1453 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1454 let wkb = Self { metadata };
1455 wkb.supports_data_type(data_type)?;
1456 Ok(wkb)
1457 }
1458}
1459
1460#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
1465pub struct WktType {
1466 metadata: Arc<Metadata>,
1467}
1468
1469impl WktType {
1470 pub fn new(metadata: Arc<Metadata>) -> Self {
1472 Self { metadata }
1473 }
1474
1475 pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1477 Self { metadata }
1478 }
1479
1480 pub fn metadata(&self) -> &Arc<Metadata> {
1482 &self.metadata
1483 }
1484}
1485
1486impl ExtensionType for WktType {
1487 const NAME: &'static str = "geoarrow.wkt";
1488
1489 type Metadata = Arc<Metadata>;
1490
1491 fn metadata(&self) -> &Self::Metadata {
1492 self.metadata()
1493 }
1494
1495 fn serialize_metadata(&self) -> Option<String> {
1496 self.metadata.serialize()
1497 }
1498
1499 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1500 Ok(Arc::new(Metadata::deserialize(metadata)?))
1501 }
1502
1503 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1504 match data_type {
1505 DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Ok(()),
1506 dt => Err(ArrowError::SchemaError(format!(
1507 "Unexpected data type {dt}"
1508 ))),
1509 }
1510 }
1511
1512 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1513 let wkb = Self { metadata };
1514 wkb.supports_data_type(data_type)?;
1515 Ok(wkb)
1516 }
1517}
1518
1519fn coord_type_to_data_type(coord_type: CoordType, dim: Dimension) -> DataType {
1520 match (coord_type, dim) {
1521 (CoordType::Interleaved, Dimension::XY) => INTERLEAVED_XY.clone(),
1522
1523 (CoordType::Interleaved, Dimension::XYZ) => INTERLEAVED_XYZ.clone(),
1524
1525 (CoordType::Interleaved, Dimension::XYM) => INTERLEAVED_XYM.clone(),
1526 (CoordType::Interleaved, Dimension::XYZM) => INTERLEAVED_XYZM.clone(),
1527 (CoordType::Separated, Dimension::XY) => SEPARATED_XY.clone(),
1528 (CoordType::Separated, Dimension::XYZ) => SEPARATED_XYZ.clone(),
1529 (CoordType::Separated, Dimension::XYM) => SEPARATED_XYM.clone(),
1530 (CoordType::Separated, Dimension::XYZM) => SEPARATED_XYZM.clone(),
1531 }
1532}
1533
1534#[cfg(test)]
1535mod test {
1536 use std::sync::Arc;
1537
1538 use arrow_schema::{DataType, Field};
1539
1540 use super::*;
1541 use crate::crs::Crs;
1542 use crate::edges::Edges;
1543
1544 #[test]
1545 fn test_point_interleaved_xy() {
1546 let data_type =
1547 DataType::FixedSizeList(Arc::new(Field::new("xy", DataType::Float64, false)), 2);
1548 let metadata = Arc::new(Metadata::default());
1549 let type_ = PointType::try_new(&data_type, metadata).unwrap();
1550
1551 assert_eq!(type_.coord_type, CoordType::Interleaved);
1552 assert_eq!(type_.dim, Dimension::XY);
1553 assert_eq!(type_.serialize_metadata(), None);
1554 }
1555
1556 #[test]
1557 fn test_point_separated_xyz() {
1558 let data_type = DataType::Struct(
1559 vec![
1560 Field::new("x", DataType::Float64, false),
1561 Field::new("y", DataType::Float64, false),
1562 Field::new("z", DataType::Float64, false),
1563 ]
1564 .into(),
1565 );
1566 let metadata = Arc::new(Metadata::default());
1567 let type_ = PointType::try_new(&data_type, metadata).unwrap();
1568
1569 assert_eq!(type_.coord_type, CoordType::Separated);
1570 assert_eq!(type_.dim, Dimension::XYZ);
1571 assert_eq!(type_.serialize_metadata(), None);
1572 }
1573
1574 #[test]
1575 fn test_point_metadata() {
1576 let data_type =
1577 DataType::FixedSizeList(Arc::new(Field::new("xy", DataType::Float64, false)), 2);
1578 let crs = Crs::from_authority_code("EPSG:4326".to_string());
1579 let metadata = Arc::new(Metadata::new(crs, Some(Edges::Spherical)));
1580 let type_ = PointType::try_new(&data_type, metadata).unwrap();
1581
1582 let expected = r#"{"crs":"EPSG:4326","crs_type":"authority_code","edges":"spherical"}"#;
1583 assert_eq!(type_.serialize_metadata().as_deref(), Some(expected));
1584 }
1585
1586 #[test]
1587 fn geometry_data_type() {
1588 let typ = GeometryCollectionType::new(Dimension::XY, Default::default());
1589 dbg!(typ.data_type());
1590 }
1591}