Skip to main content

geoarrow_schema/
type.rs

1use std::collections::HashSet;
2use std::sync::{Arc, LazyLock};
3
4use arrow_schema::extension::ExtensionType;
5use arrow_schema::{ArrowError, DataType, Field, UnionFields, UnionMode};
6
7use crate::error::GeoArrowError;
8use crate::metadata::Metadata;
9use crate::{CoordType, Dimension};
10
11macro_rules! define_basic_type {
12    (
13        $(#[$($attrss:meta)*])*
14        $struct_name:ident
15    ) => {
16        $(#[$($attrss)*])*
17        #[derive(Debug, Clone, PartialEq, Eq, Hash)]
18        pub struct $struct_name {
19            coord_type: CoordType,
20            dim: Dimension,
21            metadata: Arc<Metadata>,
22        }
23
24        impl $struct_name {
25            /// Construct a new type from parts.
26            pub fn new(dim: Dimension, metadata: Arc<Metadata>) -> Self {
27                Self {
28                    coord_type: Default::default(),
29                    dim,
30                    metadata,
31                }
32            }
33
34            /// Change the underlying [`CoordType`]
35            pub fn with_coord_type(self, coord_type: CoordType) -> Self {
36                Self { coord_type, ..self }
37            }
38
39            /// Change the underlying [`Dimension`]
40            pub fn with_dimension(self, dim: Dimension) -> Self {
41                Self { dim, ..self }
42            }
43
44            /// Change the underlying [`Metadata`]
45            pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
46                Self { metadata, ..self }
47            }
48
49            /// Retrieve the underlying [`CoordType`]
50            pub fn coord_type(&self) -> CoordType {
51                self.coord_type
52            }
53
54            /// Retrieve the underlying [`Dimension`]
55            pub fn dimension(&self) -> Dimension {
56                self.dim
57            }
58
59            /// Retrieve the underlying [`Metadata`]
60            pub fn metadata(&self) -> &Arc<Metadata> {
61                &self.metadata
62            }
63
64            /// Convert this type to a [`Field`], retaining extension metadata.
65            pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
66                Field::new(name, self.data_type(), nullable).with_extension_type(self.clone())
67            }
68
69            /// Extract into components
70            pub fn into_inner(self) -> (CoordType, Dimension, Arc<Metadata>) {
71                (self.coord_type, self.dim, self.metadata)
72            }
73        }
74    };
75}
76
77define_basic_type!(
78    /// A GeoArrow Point type.
79    ///
80    /// Refer to the [GeoArrow
81    /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#point).
82    PointType
83);
84define_basic_type!(
85    /// A GeoArrow LineString type.
86    ///
87    /// Refer to the [GeoArrow
88    /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#linestring).
89    LineStringType
90);
91define_basic_type!(
92    /// A GeoArrow Polygon type.
93    ///
94    /// Refer to the [GeoArrow
95    /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#polygon).
96    PolygonType
97);
98define_basic_type!(
99    /// A GeoArrow MultiPoint type.
100    ///
101    /// Refer to the [GeoArrow
102    /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#multipoint).
103    MultiPointType
104);
105define_basic_type!(
106    /// A GeoArrow MultiLineString type.
107    ///
108    /// Refer to the [GeoArrow
109    /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#multilinestring).
110    MultiLineStringType
111);
112define_basic_type!(
113    /// A GeoArrow MultiPolygon type.
114    ///
115    /// Refer to the [GeoArrow
116    /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#multipolygon).
117    MultiPolygonType
118);
119define_basic_type!(
120    /// A GeoArrow GeometryCollection type.
121    ///
122    /// Refer to the [GeoArrow
123    /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#geometrycollection).
124    GeometryCollectionType
125);
126
127impl PointType {
128    /// Convert to the corresponding [`DataType`].
129    ///
130    /// ```
131    /// use arrow_schema::{DataType, Field};
132    /// use geoarrow_schema::{CoordType, Dimension, PointType};
133    ///
134    /// let geom_type = PointType::new(Dimension::XY, Default::default()).with_coord_type(CoordType::Interleaved);
135    /// let expected_type =
136    ///     DataType::FixedSizeList(Field::new("xy", DataType::Float64, false).into(), 2);
137    /// assert_eq!(geom_type.data_type(), expected_type);
138    /// ```
139    pub fn data_type(&self) -> DataType {
140        coord_type_to_data_type(self.coord_type, self.dim)
141    }
142}
143
144impl ExtensionType for PointType {
145    const NAME: &'static str = "geoarrow.point";
146
147    type Metadata = Arc<Metadata>;
148
149    fn metadata(&self) -> &Self::Metadata {
150        self.metadata()
151    }
152
153    fn serialize_metadata(&self) -> Option<String> {
154        self.metadata.serialize()
155    }
156
157    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
158        Ok(Arc::new(Metadata::deserialize(metadata)?))
159    }
160
161    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
162        let (coord_type, dim) = parse_point(data_type)?;
163        if coord_type != self.coord_type {
164            return Err(ArrowError::SchemaError(format!(
165                "Expected coordinate type {:?}, but got {:?}",
166                self.coord_type, coord_type
167            )));
168        }
169        if dim != self.dim {
170            return Err(ArrowError::SchemaError(format!(
171                "Expected dimension {:?}, but got {:?}",
172                self.dim, dim
173            )));
174        }
175        Ok(())
176    }
177
178    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
179        let (coord_type, dim) = parse_point(data_type)?;
180        Ok(Self {
181            coord_type,
182            dim,
183            metadata,
184        })
185    }
186}
187
188fn parse_point(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
189    match data_type {
190        DataType::FixedSizeList(inner_field, list_size) => {
191            let dim_parsed_from_field = Dimension::from_interleaved_field(inner_field)?;
192            if dim_parsed_from_field.size() != *list_size as usize {
193                Err(GeoArrowError::InvalidGeoArrow(format!(
194                    "Field metadata suggests list of size {}, but list size is {}",
195                    dim_parsed_from_field.size(),
196                    list_size
197                ))
198                .into())
199            } else {
200                Ok((CoordType::Interleaved, dim_parsed_from_field))
201            }
202        }
203        DataType::Struct(struct_fields) => Ok((
204            CoordType::Separated,
205            Dimension::from_separated_field(struct_fields)?,
206        )),
207        dt => Err(ArrowError::SchemaError(format!(
208            "Unexpected data type {dt}"
209        ))),
210    }
211}
212
213impl LineStringType {
214    /// Convert to the corresponding [`DataType`].
215    ///
216    /// ```
217    /// use arrow_schema::{DataType, Field};
218    /// use geoarrow_schema::{Dimension, LineStringType};
219    ///
220    /// let geom_type = LineStringType::new(Dimension::XY, Default::default());
221    /// let expected_coord_type = DataType::Struct(
222    ///     vec![
223    ///         Field::new("x", DataType::Float64, false),
224    ///         Field::new("y", DataType::Float64, false),
225    ///     ]
226    ///     .into(),
227    /// );
228    /// let expected_type = DataType::List(Field::new("vertices", expected_coord_type, false).into());
229    /// assert_eq!(geom_type.data_type(), expected_type);
230    /// ```
231    pub fn data_type(&self) -> DataType {
232        let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
233        let vertices_field = Field::new("vertices", coords_type, false).into();
234        DataType::List(vertices_field)
235    }
236}
237
238impl ExtensionType for LineStringType {
239    const NAME: &'static str = "geoarrow.linestring";
240
241    type Metadata = Arc<Metadata>;
242
243    fn metadata(&self) -> &Self::Metadata {
244        self.metadata()
245    }
246
247    fn serialize_metadata(&self) -> Option<String> {
248        self.metadata.serialize()
249    }
250
251    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
252        Ok(Arc::new(Metadata::deserialize(metadata)?))
253    }
254
255    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
256        let (coord_type, dim) = parse_linestring(data_type)?;
257        if coord_type != self.coord_type {
258            return Err(ArrowError::SchemaError(format!(
259                "Expected coordinate type {:?}, but got {:?}",
260                self.coord_type, coord_type
261            )));
262        }
263        if dim != self.dim {
264            return Err(ArrowError::SchemaError(format!(
265                "Expected dimension {:?}, but got {:?}",
266                self.dim, dim
267            )));
268        }
269        Ok(())
270    }
271
272    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
273        let (coord_type, dim) = parse_linestring(data_type)?;
274        Ok(Self {
275            coord_type,
276            dim,
277            metadata,
278        })
279    }
280}
281
282fn parse_linestring(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
283    match data_type {
284        DataType::List(inner_field) | DataType::LargeList(inner_field) => {
285            parse_point(inner_field.data_type())
286        }
287        dt => Err(ArrowError::SchemaError(format!(
288            "Unexpected data type {dt}"
289        ))),
290    }
291}
292
293impl PolygonType {
294    /// Convert to the corresponding [`DataType`].
295    ///
296    /// ```
297    /// use arrow_schema::{DataType, Field};
298    /// use geoarrow_schema::{Dimension, PolygonType};
299    ///
300    /// let geom_type = PolygonType::new(Dimension::XYZ, Default::default());
301    ///
302    /// let expected_coord_type = DataType::Struct(
303    ///     vec![
304    ///         Field::new("x", DataType::Float64, false),
305    ///         Field::new("y", DataType::Float64, false),
306    ///         Field::new("z", DataType::Float64, false),
307    ///     ]
308    ///     .into(),
309    /// );
310    /// let vertices_field = Field::new("vertices", expected_coord_type, false);
311    /// let rings_field = Field::new_list("rings", vertices_field, false);
312    /// let expected_type = DataType::List(rings_field.into());
313    /// assert_eq!(geom_type.data_type(), expected_type);
314    /// ```
315    pub fn data_type(&self) -> DataType {
316        let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
317        let vertices_field = Field::new("vertices", coords_type, false);
318        let rings_field = Field::new_list("rings", vertices_field, false).into();
319        DataType::List(rings_field)
320    }
321}
322
323impl ExtensionType for PolygonType {
324    const NAME: &'static str = "geoarrow.polygon";
325
326    type Metadata = Arc<Metadata>;
327
328    fn metadata(&self) -> &Self::Metadata {
329        self.metadata()
330    }
331
332    fn serialize_metadata(&self) -> Option<String> {
333        self.metadata.serialize()
334    }
335
336    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
337        Ok(Arc::new(Metadata::deserialize(metadata)?))
338    }
339
340    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
341        let (coord_type, dim) = parse_polygon(data_type)?;
342        if coord_type != self.coord_type {
343            return Err(ArrowError::SchemaError(format!(
344                "Expected coordinate type {:?}, but got {:?}",
345                self.coord_type, coord_type
346            )));
347        }
348        if dim != self.dim {
349            return Err(ArrowError::SchemaError(format!(
350                "Expected dimension {:?}, but got {:?}",
351                self.dim, dim
352            )));
353        }
354        Ok(())
355    }
356
357    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
358        let (coord_type, dim) = parse_polygon(data_type)?;
359        Ok(Self {
360            coord_type,
361            dim,
362            metadata,
363        })
364    }
365}
366
367fn parse_polygon(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
368    match data_type {
369        DataType::List(inner1) => match inner1.data_type() {
370            DataType::List(inner2) => parse_point(inner2.data_type()),
371            dt => Err(ArrowError::SchemaError(format!(
372                "Unexpected inner polygon data type: {dt}"
373            ))),
374        },
375        DataType::LargeList(inner1) => match inner1.data_type() {
376            DataType::LargeList(inner2) => parse_point(inner2.data_type()),
377            dt => Err(ArrowError::SchemaError(format!(
378                "Unexpected inner polygon data type: {dt}"
379            ))),
380        },
381        dt => Err(ArrowError::SchemaError(format!(
382            "Unexpected root data type parsing polygon {dt}"
383        ))),
384    }
385}
386
387impl MultiPointType {
388    /// Convert to the corresponding [`DataType`].
389    ///
390    /// ```
391    /// use arrow_schema::{DataType, Field};
392    /// use geoarrow_schema::{Dimension, MultiPointType};
393    ///
394    /// let geom_type = MultiPointType::new(Dimension::XYZ, Default::default());
395    ///
396    /// let expected_coord_type = DataType::Struct(
397    ///     vec![
398    ///         Field::new("x", DataType::Float64, false),
399    ///         Field::new("y", DataType::Float64, false),
400    ///         Field::new("z", DataType::Float64, false),
401    ///     ]
402    ///     .into(),
403    /// );
404    /// let vertices_field = Field::new("points", expected_coord_type, false);
405    /// let expected_type = DataType::List(vertices_field.into());
406    /// assert_eq!(geom_type.data_type(), expected_type);
407    /// ```
408    pub fn data_type(&self) -> DataType {
409        let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
410        let vertices_field = Field::new("points", coords_type, false).into();
411        DataType::List(vertices_field)
412    }
413}
414
415impl ExtensionType for MultiPointType {
416    const NAME: &'static str = "geoarrow.multipoint";
417
418    type Metadata = Arc<Metadata>;
419
420    fn metadata(&self) -> &Self::Metadata {
421        self.metadata()
422    }
423
424    fn serialize_metadata(&self) -> Option<String> {
425        self.metadata.serialize()
426    }
427
428    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
429        Ok(Arc::new(Metadata::deserialize(metadata)?))
430    }
431
432    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
433        let (coord_type, dim) = parse_multipoint(data_type)?;
434        if coord_type != self.coord_type {
435            return Err(ArrowError::SchemaError(format!(
436                "Expected coordinate type {:?}, but got {:?}",
437                self.coord_type, coord_type
438            )));
439        }
440        if dim != self.dim {
441            return Err(ArrowError::SchemaError(format!(
442                "Expected dimension {:?}, but got {:?}",
443                self.dim, dim
444            )));
445        }
446        Ok(())
447    }
448
449    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
450        let (coord_type, dim) = parse_multipoint(data_type)?;
451        Ok(Self {
452            coord_type,
453            dim,
454            metadata,
455        })
456    }
457}
458
459fn parse_multipoint(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
460    match data_type {
461        DataType::List(inner_field) => parse_point(inner_field.data_type()),
462        DataType::LargeList(inner_field) => parse_point(inner_field.data_type()),
463        dt => Err(ArrowError::SchemaError(format!(
464            "Unexpected data type {dt}"
465        ))),
466    }
467}
468
469impl MultiLineStringType {
470    /// Convert to the corresponding [`DataType`].
471    ///
472    /// ```
473    /// use arrow_schema::{DataType, Field};
474    /// use geoarrow_schema::{Dimension, MultiLineStringType};
475    ///
476    /// let geom_type =
477    ///     MultiLineStringType::new(Dimension::XYZ, Default::default());
478    ///
479    /// let expected_coord_type = DataType::Struct(
480    ///     vec![
481    ///         Field::new("x", DataType::Float64, false),
482    ///         Field::new("y", DataType::Float64, false),
483    ///         Field::new("z", DataType::Float64, false),
484    ///     ]
485    ///     .into(),
486    /// );
487    /// let vertices_field = Field::new("vertices", expected_coord_type, false);
488    /// let linestrings_field = Field::new_list("linestrings", vertices_field, false);
489    /// let expected_type = DataType::List(linestrings_field.into());
490    /// assert_eq!(geom_type.data_type(), expected_type);
491    /// ```
492    pub fn data_type(&self) -> DataType {
493        let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
494        let vertices_field = Field::new("vertices", coords_type, false);
495        let linestrings_field = Field::new_list("linestrings", vertices_field, false).into();
496        DataType::List(linestrings_field)
497    }
498}
499
500impl ExtensionType for MultiLineStringType {
501    const NAME: &'static str = "geoarrow.multilinestring";
502
503    type Metadata = Arc<Metadata>;
504
505    fn metadata(&self) -> &Self::Metadata {
506        self.metadata()
507    }
508
509    fn serialize_metadata(&self) -> Option<String> {
510        self.metadata.serialize()
511    }
512
513    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
514        Ok(Arc::new(Metadata::deserialize(metadata)?))
515    }
516
517    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
518        let (coord_type, dim) = parse_multilinestring(data_type)?;
519        if coord_type != self.coord_type {
520            return Err(ArrowError::SchemaError(format!(
521                "Expected coordinate type {:?}, but got {:?}",
522                self.coord_type, coord_type
523            )));
524        }
525        if dim != self.dim {
526            return Err(ArrowError::SchemaError(format!(
527                "Expected dimension {:?}, but got {:?}",
528                self.dim, dim
529            )));
530        }
531        Ok(())
532    }
533
534    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
535        let (coord_type, dim) = parse_multilinestring(data_type)?;
536        Ok(Self {
537            coord_type,
538            dim,
539            metadata,
540        })
541    }
542}
543
544fn parse_multilinestring(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
545    match data_type {
546        DataType::List(inner1) => match inner1.data_type() {
547            DataType::List(inner2) => parse_point(inner2.data_type()),
548            dt => Err(ArrowError::SchemaError(format!(
549                "Unexpected inner multilinestring data type: {dt}"
550            ))),
551        },
552        DataType::LargeList(inner1) => match inner1.data_type() {
553            DataType::LargeList(inner2) => parse_point(inner2.data_type()),
554            dt => Err(ArrowError::SchemaError(format!(
555                "Unexpected inner multilinestring data type: {dt}"
556            ))),
557        },
558        dt => Err(ArrowError::SchemaError(format!(
559            "Unexpected data type parsing multilinestring: {dt}"
560        ))),
561    }
562}
563
564impl MultiPolygonType {
565    /// Convert to the corresponding [`DataType`].
566    ///
567    /// ```
568    /// use arrow_schema::{DataType, Field};
569    /// use geoarrow_schema::{Dimension, MultiPolygonType};
570    ///
571    /// let geom_type = MultiPolygonType::new(Dimension::XYM, Default::default());
572    ///
573    /// let expected_coord_type = DataType::Struct(
574    ///     vec![
575    ///         Field::new("x", DataType::Float64, false),
576    ///         Field::new("y", DataType::Float64, false),
577    ///         Field::new("m", DataType::Float64, false),
578    ///     ]
579    ///     .into(),
580    /// );
581    /// let vertices_field = Field::new("vertices", expected_coord_type, false);
582    /// let rings_field = Field::new_list("rings", vertices_field, false);
583    /// let polygons_field = Field::new_list("polygons", rings_field, false);
584    /// let expected_type = DataType::List(polygons_field.into());
585    /// assert_eq!(geom_type.data_type(), expected_type);
586    /// ```
587    pub fn data_type(&self) -> DataType {
588        let coords_type = coord_type_to_data_type(self.coord_type, self.dim);
589        let vertices_field = Field::new("vertices", coords_type, false);
590        let rings_field = Field::new_list("rings", vertices_field, false);
591        let polygons_field = Field::new_list("polygons", rings_field, false).into();
592        DataType::List(polygons_field)
593    }
594}
595
596impl ExtensionType for MultiPolygonType {
597    const NAME: &'static str = "geoarrow.multipolygon";
598
599    type Metadata = Arc<Metadata>;
600
601    fn metadata(&self) -> &Self::Metadata {
602        self.metadata()
603    }
604
605    fn serialize_metadata(&self) -> Option<String> {
606        self.metadata.serialize()
607    }
608
609    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
610        Ok(Arc::new(Metadata::deserialize(metadata)?))
611    }
612
613    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
614        let (coord_type, dim) = parse_multipolygon(data_type)?;
615        if coord_type != self.coord_type {
616            return Err(ArrowError::SchemaError(format!(
617                "Expected coordinate type {:?}, but got {:?}",
618                self.coord_type, coord_type
619            )));
620        }
621        if dim != self.dim {
622            return Err(ArrowError::SchemaError(format!(
623                "Expected dimension {:?}, but got {:?}",
624                self.dim, dim
625            )));
626        }
627        Ok(())
628    }
629
630    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
631        let (coord_type, dim) = parse_multipolygon(data_type)?;
632        Ok(Self {
633            coord_type,
634            dim,
635            metadata,
636        })
637    }
638}
639
640fn parse_multipolygon(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
641    match data_type {
642        DataType::List(inner1) => match inner1.data_type() {
643            DataType::List(inner2) => match inner2.data_type() {
644                DataType::List(inner3) => parse_point(inner3.data_type()),
645                dt => Err(ArrowError::SchemaError(format!(
646                    "Unexpected inner2 multipolygon data type: {dt}"
647                ))),
648            },
649            dt => Err(ArrowError::SchemaError(format!(
650                "Unexpected inner1 multipolygon data type: {dt}"
651            ))),
652        },
653        DataType::LargeList(inner1) => match inner1.data_type() {
654            DataType::LargeList(inner2) => match inner2.data_type() {
655                DataType::LargeList(inner3) => parse_point(inner3.data_type()),
656                dt => Err(ArrowError::SchemaError(format!(
657                    "Unexpected inner2 multipolygon data type: {dt}"
658                ))),
659            },
660            dt => Err(ArrowError::SchemaError(format!(
661                "Unexpected inner1 multipolygon data type: {dt}"
662            ))),
663        },
664        dt => Err(ArrowError::SchemaError(format!(
665            "Unexpected data type {dt}"
666        ))),
667    }
668}
669
670impl GeometryCollectionType {
671    /// Convert to the corresponding [`DataType`].
672    ///
673    /// ```
674    /// use std::sync::Arc;
675    ///
676    /// use arrow_schema::{DataType, Field, UnionFields, UnionMode};
677    /// use geoarrow_schema::{
678    ///     Dimension, GeometryCollectionType, LineStringType, Metadata, MultiLineStringType,
679    ///     MultiPointType, MultiPolygonType, PointType, PolygonType,
680    /// };
681    ///
682    /// let dim = Dimension::XY;
683    /// let metadata = Arc::new(Metadata::default());
684    /// let geom_type = GeometryCollectionType::new(dim, metadata.clone());
685    ///
686    /// let fields = vec![
687    ///     Field::new(
688    ///         "Point",
689    ///         PointType::new(dim, metadata.clone()).data_type(),
690    ///         true,
691    ///     ),
692    ///     Field::new(
693    ///         "LineString",
694    ///         LineStringType::new(dim, metadata.clone()).data_type(),
695    ///         true,
696    ///     ),
697    ///     Field::new(
698    ///         "Polygon",
699    ///         PolygonType::new(dim, metadata.clone()).data_type(),
700    ///         true,
701    ///     ),
702    ///     Field::new(
703    ///         "MultiPoint",
704    ///         MultiPointType::new(dim, metadata.clone()).data_type(),
705    ///         true,
706    ///     ),
707    ///     Field::new(
708    ///         "MultiLineString",
709    ///         MultiLineStringType::new(dim, metadata.clone()).data_type(),
710    ///         true,
711    ///     ),
712    ///     Field::new(
713    ///         "MultiPolygon",
714    ///         MultiPolygonType::new(dim, metadata.clone()).data_type(),
715    ///         true,
716    ///     ),
717    /// ];
718    /// let type_ids = vec![1, 2, 3, 4, 5, 6];
719    ///
720    /// let union_fields = UnionFields::new(type_ids, fields);
721    /// let union_data_type = DataType::Union(union_fields, UnionMode::Dense);
722    ///
723    /// let geometries_field = Field::new("geometries", union_data_type, false).into();
724    /// let expected_type = DataType::List(geometries_field);
725    ///
726    /// assert_eq!(geom_type.data_type(), expected_type);
727    /// ```
728    pub fn data_type(&self) -> DataType {
729        let geometries_field = Field::new(
730            "geometries",
731            mixed_data_type(self.coord_type, self.dim),
732            false,
733        )
734        .into();
735        DataType::List(geometries_field)
736    }
737}
738
739fn mixed_data_type(coord_type: CoordType, dim: Dimension) -> DataType {
740    let mut fields = vec![];
741    let mut type_ids = vec![];
742
743    match dim {
744        Dimension::XY => type_ids.extend([1, 2, 3, 4, 5, 6]),
745        Dimension::XYZ => type_ids.extend([11, 12, 13, 14, 15, 16]),
746        Dimension::XYM => type_ids.extend([21, 22, 23, 24, 25, 26]),
747        Dimension::XYZM => type_ids.extend([31, 32, 33, 34, 35, 36]),
748    }
749
750    // Note: we manually construct the fields because these fields shouldn't have their own
751    // GeoArrow extension metadata
752    macro_rules! push_field {
753        ($field_name:literal, $geom_type:ident) => {{
754            fields.push(Field::new(
755                $field_name,
756                $geom_type {
757                    coord_type,
758                    dim,
759                    metadata: Metadata::default().into(),
760                }
761                .data_type(),
762                true,
763            ));
764        }};
765    }
766
767    match dim {
768        Dimension::XY => {
769            push_field!("Point", PointType);
770            push_field!("LineString", LineStringType);
771            push_field!("Polygon", PolygonType);
772            push_field!("MultiPoint", MultiPointType);
773            push_field!("MultiLineString", MultiLineStringType);
774            push_field!("MultiPolygon", MultiPolygonType);
775        }
776        Dimension::XYZ => {
777            push_field!("Point Z", PointType);
778            push_field!("LineString Z", LineStringType);
779            push_field!("Polygon Z", PolygonType);
780            push_field!("MultiPoint Z", MultiPointType);
781            push_field!("MultiLineString Z", MultiLineStringType);
782            push_field!("MultiPolygon Z", MultiPolygonType);
783        }
784        Dimension::XYM => {
785            push_field!("Point M", PointType);
786            push_field!("LineString M", LineStringType);
787            push_field!("Polygon M", PolygonType);
788            push_field!("MultiPoint M", MultiPointType);
789            push_field!("MultiLineString M", MultiLineStringType);
790            push_field!("MultiPolygon M", MultiPolygonType);
791        }
792        Dimension::XYZM => {
793            push_field!("Point ZM", PointType);
794            push_field!("LineString ZM", LineStringType);
795            push_field!("Polygon ZM", PolygonType);
796            push_field!("MultiPoint ZM", MultiPointType);
797            push_field!("MultiLineString ZM", MultiLineStringType);
798            push_field!("MultiPolygon ZM", MultiPolygonType);
799        }
800    }
801
802    let union_fields = UnionFields::try_new(type_ids, fields)
803        .expect("type_ids and fields should have the same length");
804    DataType::Union(union_fields, UnionMode::Dense)
805}
806
807impl ExtensionType for GeometryCollectionType {
808    const NAME: &'static str = "geoarrow.geometrycollection";
809
810    type Metadata = Arc<Metadata>;
811
812    fn metadata(&self) -> &Self::Metadata {
813        self.metadata()
814    }
815
816    fn serialize_metadata(&self) -> Option<String> {
817        self.metadata.serialize()
818    }
819
820    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
821        Ok(Arc::new(Metadata::deserialize(metadata)?))
822    }
823
824    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
825        let (coord_type, dim) = parse_geometry_collection(data_type)?;
826        if coord_type != self.coord_type {
827            return Err(ArrowError::SchemaError(format!(
828                "Expected coordinate type {:?}, but got {:?}",
829                self.coord_type, coord_type
830            )));
831        }
832        if dim != self.dim {
833            return Err(ArrowError::SchemaError(format!(
834                "Expected dimension {:?}, but got {:?}",
835                self.dim, dim
836            )));
837        }
838        Ok(())
839    }
840
841    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
842        let (coord_type, dim) = parse_geometry_collection(data_type)?;
843        Ok(Self {
844            coord_type,
845            dim,
846            metadata,
847        })
848    }
849}
850
851fn parse_mixed(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
852    match data_type {
853        DataType::Union(fields, _) => {
854            let mut coord_types: HashSet<CoordType> = HashSet::new();
855            let mut dimensions: HashSet<Dimension> = HashSet::new();
856
857            // Validate that all fields of the union have the same coordinate type and dimension
858            fields.iter().try_for_each(|(type_id, field)| {
859                macro_rules! impl_type_id {
860                    ($expected_dim:path, $parse_fn:ident) => {{
861                        let (ct, dim) = $parse_fn(field.data_type())?;
862                        coord_types.insert(ct);
863                        assert!(matches!(dim, $expected_dim));
864                        dimensions.insert(dim);
865                    }};
866                }
867
868                match type_id {
869                    1 => impl_type_id!(Dimension::XY, parse_point),
870                    2 => impl_type_id!(Dimension::XY, parse_linestring),
871                    3 => impl_type_id!(Dimension::XY, parse_polygon),
872                    4 => impl_type_id!(Dimension::XY, parse_multipoint),
873                    5 => impl_type_id!(Dimension::XY, parse_multilinestring),
874                    6 => impl_type_id!(Dimension::XY, parse_multipolygon),
875                    11 => impl_type_id!(Dimension::XYZ, parse_point),
876                    12 => impl_type_id!(Dimension::XYZ, parse_linestring),
877                    13 => impl_type_id!(Dimension::XYZ, parse_polygon),
878                    14 => impl_type_id!(Dimension::XYZ, parse_multipoint),
879                    15 => impl_type_id!(Dimension::XYZ, parse_multilinestring),
880                    16 => impl_type_id!(Dimension::XYZ, parse_multipolygon),
881                    21 => impl_type_id!(Dimension::XYM, parse_point),
882                    22 => impl_type_id!(Dimension::XYM, parse_linestring),
883                    23 => impl_type_id!(Dimension::XYM, parse_polygon),
884                    24 => impl_type_id!(Dimension::XYM, parse_multipoint),
885                    25 => impl_type_id!(Dimension::XYM, parse_multilinestring),
886                    26 => impl_type_id!(Dimension::XYM, parse_multipolygon),
887                    31 => impl_type_id!(Dimension::XYZM, parse_point),
888                    32 => impl_type_id!(Dimension::XYZM, parse_linestring),
889                    33 => impl_type_id!(Dimension::XYZM, parse_polygon),
890                    34 => impl_type_id!(Dimension::XYZM, parse_multipoint),
891                    35 => impl_type_id!(Dimension::XYZM, parse_multilinestring),
892                    36 => impl_type_id!(Dimension::XYZM, parse_multipolygon),
893                    id => {
894                        return Err(ArrowError::SchemaError(format!(
895                            "Unexpected type id parsing mixed: {id}"
896                        )));
897                    }
898                };
899                Ok::<_, ArrowError>(())
900            })?;
901
902            if coord_types.len() > 1 {
903                return Err(ArrowError::SchemaError(
904                    "Multi coord types in union".to_string(),
905                ));
906            }
907            if dimensions.len() > 1 {
908                return Err(ArrowError::SchemaError(
909                    "Multi dimensions types in union".to_string(),
910                ));
911            }
912
913            let coord_type = coord_types.drain().next().unwrap();
914            let dimension = dimensions.drain().next().unwrap();
915            Ok((coord_type, dimension))
916        }
917        dt => Err(ArrowError::SchemaError(format!(
918            "Unexpected mixed data type: {dt}"
919        ))),
920    }
921}
922
923fn parse_geometry_collection(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> {
924    // We need to parse the _inner_ type of the geometry collection as a union so that we can check
925    // what coordinate type it's using.
926    match data_type {
927        DataType::List(inner_field) | DataType::LargeList(inner_field) => {
928            parse_mixed(inner_field.data_type())
929        }
930        dt => Err(ArrowError::SchemaError(format!(
931            "Unexpected geometry collection data type: {dt}"
932        ))),
933    }
934}
935
936static INTERLEAVED_XY: LazyLock<DataType> = LazyLock::new(|| {
937    let values_field = Field::new("xy", DataType::Float64, false);
938    DataType::FixedSizeList(Arc::new(values_field), 2)
939});
940
941static INTERLEAVED_XYZ: LazyLock<DataType> = LazyLock::new(|| {
942    let values_field = Field::new("xyz", DataType::Float64, false);
943    DataType::FixedSizeList(Arc::new(values_field), 3)
944});
945
946static INTERLEAVED_XYM: LazyLock<DataType> = LazyLock::new(|| {
947    let values_field = Field::new("xym", DataType::Float64, false);
948    DataType::FixedSizeList(Arc::new(values_field), 3)
949});
950
951static INTERLEAVED_XYZM: LazyLock<DataType> = LazyLock::new(|| {
952    let values_field = Field::new("xyzm", DataType::Float64, false);
953    DataType::FixedSizeList(Arc::new(values_field), 4)
954});
955
956static SEPARATED_XY: LazyLock<DataType> = LazyLock::new(|| {
957    DataType::Struct(
958        vec![
959            Field::new("x", DataType::Float64, false),
960            Field::new("y", DataType::Float64, false),
961        ]
962        .into(),
963    )
964});
965
966static SEPARATED_XYZ: LazyLock<DataType> = LazyLock::new(|| {
967    DataType::Struct(
968        vec![
969            Field::new("x", DataType::Float64, false),
970            Field::new("y", DataType::Float64, false),
971            Field::new("z", DataType::Float64, false),
972        ]
973        .into(),
974    )
975});
976
977static SEPARATED_XYM: LazyLock<DataType> = LazyLock::new(|| {
978    DataType::Struct(
979        vec![
980            Field::new("x", DataType::Float64, false),
981            Field::new("y", DataType::Float64, false),
982            Field::new("m", DataType::Float64, false),
983        ]
984        .into(),
985    )
986});
987
988static SEPARATED_XYZM: LazyLock<DataType> = LazyLock::new(|| {
989    DataType::Struct(
990        vec![
991            Field::new("x", DataType::Float64, false),
992            Field::new("y", DataType::Float64, false),
993            Field::new("z", DataType::Float64, false),
994            Field::new("m", DataType::Float64, false),
995        ]
996        .into(),
997    )
998});
999
1000/// A GeoArrow Geometry type.
1001///
1002/// Refer to the [GeoArrow
1003/// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#geometry).
1004#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
1005pub struct GeometryType {
1006    coord_type: CoordType,
1007    metadata: Arc<Metadata>,
1008}
1009
1010impl GeometryType {
1011    /// Construct a new type from parts.
1012    pub fn new(metadata: Arc<Metadata>) -> Self {
1013        Self {
1014            coord_type: Default::default(),
1015            metadata,
1016        }
1017    }
1018
1019    /// Change the underlying [`CoordType`]
1020    pub fn with_coord_type(self, coord_type: CoordType) -> Self {
1021        Self { coord_type, ..self }
1022    }
1023
1024    /// Change the underlying [`Metadata`]
1025    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1026        Self { metadata, ..self }
1027    }
1028
1029    /// Retrieve the underlying [`CoordType`]
1030    pub fn coord_type(&self) -> CoordType {
1031        self.coord_type
1032    }
1033
1034    /// Retrieve the underlying [`Metadata`]
1035    pub fn metadata(&self) -> &Arc<Metadata> {
1036        &self.metadata
1037    }
1038
1039    /// Convert to the corresponding [`DataType`].
1040    pub fn data_type(&self) -> DataType {
1041        let mut fields = vec![];
1042        let type_ids = vec![
1043            1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 31, 32,
1044            33, 34, 35, 36, 37,
1045        ];
1046
1047        // Note: we manually construct the fields because these fields shouldn't have their own
1048        // GeoArrow extension metadata
1049        macro_rules! push_field {
1050            ($field_name:literal, $geom_type:ident, $dim:path) => {{
1051                fields.push(Field::new(
1052                    $field_name,
1053                    $geom_type {
1054                        coord_type: self.coord_type,
1055                        dim: $dim,
1056                        metadata: Metadata::default().into(),
1057                    }
1058                    .data_type(),
1059                    true,
1060                ));
1061            }};
1062        }
1063
1064        push_field!("Point", PointType, Dimension::XY);
1065        push_field!("LineString", LineStringType, Dimension::XY);
1066        push_field!("Polygon", PolygonType, Dimension::XY);
1067        push_field!("MultiPoint", MultiPointType, Dimension::XY);
1068        push_field!("MultiLineString", MultiLineStringType, Dimension::XY);
1069        push_field!("MultiPolygon", MultiPolygonType, Dimension::XY);
1070        push_field!("GeometryCollection", GeometryCollectionType, Dimension::XY);
1071
1072        push_field!("Point Z", PointType, Dimension::XYZ);
1073        push_field!("LineString Z", LineStringType, Dimension::XYZ);
1074        push_field!("Polygon Z", PolygonType, Dimension::XYZ);
1075        push_field!("MultiPoint Z", MultiPointType, Dimension::XYZ);
1076        push_field!("MultiLineString Z", MultiLineStringType, Dimension::XYZ);
1077        push_field!("MultiPolygon Z", MultiPolygonType, Dimension::XYZ);
1078        push_field!(
1079            "GeometryCollection Z",
1080            GeometryCollectionType,
1081            Dimension::XYZ
1082        );
1083
1084        push_field!("Point M", PointType, Dimension::XYM);
1085        push_field!("LineString M", LineStringType, Dimension::XYM);
1086        push_field!("Polygon M", PolygonType, Dimension::XYM);
1087        push_field!("MultiPoint M", MultiPointType, Dimension::XYM);
1088        push_field!("MultiLineString M", MultiLineStringType, Dimension::XYM);
1089        push_field!("MultiPolygon M", MultiPolygonType, Dimension::XYM);
1090        push_field!(
1091            "GeometryCollection M",
1092            GeometryCollectionType,
1093            Dimension::XYM
1094        );
1095
1096        push_field!("Point ZM", PointType, Dimension::XYZM);
1097        push_field!("LineString ZM", LineStringType, Dimension::XYZM);
1098        push_field!("Polygon ZM", PolygonType, Dimension::XYZM);
1099        push_field!("MultiPoint ZM", MultiPointType, Dimension::XYZM);
1100        push_field!("MultiLineString ZM", MultiLineStringType, Dimension::XYZM);
1101        push_field!("MultiPolygon ZM", MultiPolygonType, Dimension::XYZM);
1102        push_field!(
1103            "GeometryCollection ZM",
1104            GeometryCollectionType,
1105            Dimension::XYZM
1106        );
1107
1108        let union_fields = UnionFields::try_new(type_ids, fields)
1109            .expect("type_ids and fields should have the same length");
1110        DataType::Union(union_fields, UnionMode::Dense)
1111    }
1112
1113    /// Convert this type to a [`Field`], retaining extension metadata.
1114    pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
1115        Field::new(name, self.data_type(), nullable).with_extension_type(self.clone())
1116    }
1117}
1118
1119impl ExtensionType for GeometryType {
1120    const NAME: &'static str = "geoarrow.geometry";
1121
1122    type Metadata = Arc<Metadata>;
1123
1124    fn metadata(&self) -> &Self::Metadata {
1125        self.metadata()
1126    }
1127
1128    fn serialize_metadata(&self) -> Option<String> {
1129        self.metadata.serialize()
1130    }
1131
1132    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1133        Ok(Arc::new(Metadata::deserialize(metadata)?))
1134    }
1135
1136    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1137        let coord_type = parse_geometry(data_type)?;
1138        if coord_type != self.coord_type {
1139            return Err(ArrowError::SchemaError(format!(
1140                "Expected coordinate type {:?}, but got {:?}",
1141                self.coord_type, coord_type
1142            )));
1143        }
1144        Ok(())
1145    }
1146
1147    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1148        let coord_type = parse_geometry(data_type)?;
1149        Ok(Self {
1150            coord_type,
1151            metadata,
1152        })
1153    }
1154}
1155
1156fn parse_geometry(data_type: &DataType) -> Result<CoordType, ArrowError> {
1157    if let DataType::Union(fields, _mode) = data_type {
1158        let mut coord_types: HashSet<CoordType> = HashSet::new();
1159
1160        // Validate that all fields of the union have the same coordinate type
1161        fields.iter().try_for_each(|(type_id, field)| {
1162            macro_rules! impl_type_id {
1163                ($expected_dim:path, $parse_fn:ident) => {{
1164                    let (ct, dim) = $parse_fn(field.data_type())?;
1165                    coord_types.insert(ct);
1166                    assert!(matches!(dim, $expected_dim));
1167                }};
1168            }
1169
1170            match type_id {
1171                1 => impl_type_id!(Dimension::XY, parse_point),
1172                2 => impl_type_id!(Dimension::XY, parse_linestring),
1173                3 => impl_type_id!(Dimension::XY, parse_polygon),
1174                4 => impl_type_id!(Dimension::XY, parse_multipoint),
1175                5 => impl_type_id!(Dimension::XY, parse_multilinestring),
1176                6 => impl_type_id!(Dimension::XY, parse_multipolygon),
1177                7 => impl_type_id!(Dimension::XY, parse_geometry_collection),
1178                11 => impl_type_id!(Dimension::XYZ, parse_point),
1179                12 => impl_type_id!(Dimension::XYZ, parse_linestring),
1180                13 => impl_type_id!(Dimension::XYZ, parse_polygon),
1181                14 => impl_type_id!(Dimension::XYZ, parse_multipoint),
1182                15 => impl_type_id!(Dimension::XYZ, parse_multilinestring),
1183                16 => impl_type_id!(Dimension::XYZ, parse_multipolygon),
1184                17 => impl_type_id!(Dimension::XYZ, parse_geometry_collection),
1185                21 => impl_type_id!(Dimension::XYM, parse_point),
1186                22 => impl_type_id!(Dimension::XYM, parse_linestring),
1187                23 => impl_type_id!(Dimension::XYM, parse_polygon),
1188                24 => impl_type_id!(Dimension::XYM, parse_multipoint),
1189                25 => impl_type_id!(Dimension::XYM, parse_multilinestring),
1190                26 => impl_type_id!(Dimension::XYM, parse_multipolygon),
1191                27 => impl_type_id!(Dimension::XYM, parse_geometry_collection),
1192                31 => impl_type_id!(Dimension::XYZM, parse_point),
1193                32 => impl_type_id!(Dimension::XYZM, parse_linestring),
1194                33 => impl_type_id!(Dimension::XYZM, parse_polygon),
1195                34 => impl_type_id!(Dimension::XYZM, parse_multipoint),
1196                35 => impl_type_id!(Dimension::XYZM, parse_multilinestring),
1197                36 => impl_type_id!(Dimension::XYZM, parse_multipolygon),
1198                37 => impl_type_id!(Dimension::XYZM, parse_geometry_collection),
1199                id => {
1200                    return Err(ArrowError::SchemaError(format!(
1201                        "Unexpected type id parsing geometry: {id}"
1202                    )));
1203                }
1204            };
1205            Ok::<_, ArrowError>(())
1206        })?;
1207
1208        if coord_types.len() > 1 {
1209            return Err(ArrowError::SchemaError(
1210                "Multi coord types in union".to_string(),
1211            ));
1212        }
1213
1214        let coord_type = coord_types.drain().next().unwrap();
1215        Ok(coord_type)
1216    } else {
1217        Err(ArrowError::SchemaError("Expected union type".to_string()))
1218    }
1219}
1220
1221/// A GeoArrow "Box" or "Rect" type.
1222///
1223/// Refer to the [GeoArrow
1224/// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#box).
1225#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1226pub struct BoxType {
1227    dim: Dimension,
1228    metadata: Arc<Metadata>,
1229}
1230
1231impl BoxType {
1232    /// Construct a new type from parts.
1233    pub fn new(dim: Dimension, metadata: Arc<Metadata>) -> Self {
1234        Self { dim, metadata }
1235    }
1236
1237    /// Change the underlying [`Dimension`]
1238    pub fn with_dimension(self, dim: Dimension) -> Self {
1239        Self { dim, ..self }
1240    }
1241
1242    /// Change the underlying [`Metadata`]
1243    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1244        Self { metadata, ..self }
1245    }
1246
1247    /// Retrieve the underlying [`CoordType`]
1248    pub fn dimension(&self) -> Dimension {
1249        self.dim
1250    }
1251
1252    /// Retrieve the underlying [`Metadata`]
1253    pub fn metadata(&self) -> &Arc<Metadata> {
1254        &self.metadata
1255    }
1256
1257    /// Convert to the corresponding [`DataType`].
1258    ///
1259    /// ```
1260    /// use arrow_schema::{DataType, Field};
1261    /// use geoarrow_schema::{BoxType, Dimension};
1262    ///
1263    /// let geom_type = BoxType::new(Dimension::XYZM, Default::default());
1264    ///
1265    /// let expected_type = DataType::Struct(
1266    ///     vec![
1267    ///         Field::new("xmin", DataType::Float64, false),
1268    ///         Field::new("ymin", DataType::Float64, false),
1269    ///         Field::new("zmin", DataType::Float64, false),
1270    ///         Field::new("mmin", DataType::Float64, false),
1271    ///         Field::new("xmax", DataType::Float64, false),
1272    ///         Field::new("ymax", DataType::Float64, false),
1273    ///         Field::new("zmax", DataType::Float64, false),
1274    ///         Field::new("mmax", DataType::Float64, false),
1275    ///     ]
1276    ///     .into(),
1277    /// );
1278    /// assert_eq!(geom_type.data_type(), expected_type);
1279    /// ```
1280    pub fn data_type(&self) -> DataType {
1281        let values_fields = match self.dim {
1282            Dimension::XY => {
1283                vec![
1284                    Field::new("xmin", DataType::Float64, false),
1285                    Field::new("ymin", DataType::Float64, false),
1286                    Field::new("xmax", DataType::Float64, false),
1287                    Field::new("ymax", DataType::Float64, false),
1288                ]
1289            }
1290            Dimension::XYZ => {
1291                vec![
1292                    Field::new("xmin", DataType::Float64, false),
1293                    Field::new("ymin", DataType::Float64, false),
1294                    Field::new("zmin", DataType::Float64, false),
1295                    Field::new("xmax", DataType::Float64, false),
1296                    Field::new("ymax", DataType::Float64, false),
1297                    Field::new("zmax", DataType::Float64, false),
1298                ]
1299            }
1300            Dimension::XYM => {
1301                vec![
1302                    Field::new("xmin", DataType::Float64, false),
1303                    Field::new("ymin", DataType::Float64, false),
1304                    Field::new("mmin", DataType::Float64, false),
1305                    Field::new("xmax", DataType::Float64, false),
1306                    Field::new("ymax", DataType::Float64, false),
1307                    Field::new("mmax", DataType::Float64, false),
1308                ]
1309            }
1310            Dimension::XYZM => {
1311                vec![
1312                    Field::new("xmin", DataType::Float64, false),
1313                    Field::new("ymin", DataType::Float64, false),
1314                    Field::new("zmin", DataType::Float64, false),
1315                    Field::new("mmin", DataType::Float64, false),
1316                    Field::new("xmax", DataType::Float64, false),
1317                    Field::new("ymax", DataType::Float64, false),
1318                    Field::new("zmax", DataType::Float64, false),
1319                    Field::new("mmax", DataType::Float64, false),
1320                ]
1321            }
1322        };
1323        DataType::Struct(values_fields.into())
1324    }
1325
1326    /// Convert this type to a [`Field`], retaining extension metadata.
1327    pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
1328        Field::new(name, self.data_type(), nullable).with_extension_type(self.clone())
1329    }
1330}
1331
1332impl ExtensionType for BoxType {
1333    const NAME: &'static str = "geoarrow.box";
1334
1335    type Metadata = Arc<Metadata>;
1336
1337    fn metadata(&self) -> &Self::Metadata {
1338        self.metadata()
1339    }
1340
1341    fn serialize_metadata(&self) -> Option<String> {
1342        self.metadata.serialize()
1343    }
1344
1345    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1346        Ok(Arc::new(Metadata::deserialize(metadata)?))
1347    }
1348
1349    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1350        let dim = parse_box(data_type)?;
1351        if dim != self.dim {
1352            return Err(ArrowError::SchemaError(format!(
1353                "Expected dimension {:?}, but got {:?}",
1354                self.dim, dim
1355            )));
1356        }
1357        Ok(())
1358    }
1359
1360    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1361        let dim = parse_box(data_type)?;
1362        Ok(Self { dim, metadata })
1363    }
1364}
1365
1366fn parse_box(data_type: &DataType) -> Result<Dimension, ArrowError> {
1367    match data_type {
1368        DataType::Struct(struct_fields) => match struct_fields.len() {
1369            4 => Ok(Dimension::XY),
1370            6 => {
1371                let names: HashSet<&str> =
1372                    struct_fields.iter().map(|f| f.name().as_str()).collect();
1373                if names.contains("mmin") && names.contains("mmax") {
1374                    Ok(Dimension::XYM)
1375                } else if names.contains("zmin") && names.contains("zmax") {
1376                    Ok(Dimension::XYZ)
1377                } else {
1378                    Err(ArrowError::SchemaError(format!(
1379                        "unexpected either mmin and mmax or zmin and zmax for struct with 6 fields. Got names: {names:?}",
1380                    )))
1381                }
1382            }
1383            8 => Ok(Dimension::XYZM),
1384            num_fields => Err(ArrowError::SchemaError(format!(
1385                "unexpected number of struct fields: {num_fields}",
1386            ))),
1387        },
1388        dt => Err(ArrowError::SchemaError(format!(
1389            "unexpected data type parsing box: {dt:?}",
1390        ))),
1391    }
1392}
1393
1394/// A type alias for [`BoxType`].
1395///
1396/// The official GeoArrow specification refers to this type as "geoarrow.box", but `Box` is a
1397/// reserved keyword in Rust and has its own meaning. In line with GeoRust, GeoArrow Rust calls
1398/// this type `Rect`.
1399pub type RectType = BoxType;
1400
1401/// A GeoArrow WKB type.
1402///
1403/// This extension type support multiple physical data types, including [`DataType::Binary`],
1404/// [`DataType::LargeBinary`], and [`DataType::BinaryView`].
1405#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
1406pub struct WkbType {
1407    metadata: Arc<Metadata>,
1408}
1409
1410impl WkbType {
1411    /// Construct a new type from parts.
1412    pub fn new(metadata: Arc<Metadata>) -> Self {
1413        Self { metadata }
1414    }
1415
1416    /// Change the underlying [`Metadata`]
1417    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1418        Self { metadata }
1419    }
1420
1421    /// Retrieve the underlying [`Metadata`]
1422    pub fn metadata(&self) -> &Arc<Metadata> {
1423        &self.metadata
1424    }
1425}
1426
1427impl ExtensionType for WkbType {
1428    const NAME: &'static str = "geoarrow.wkb";
1429
1430    type Metadata = Arc<Metadata>;
1431
1432    fn metadata(&self) -> &Self::Metadata {
1433        self.metadata()
1434    }
1435
1436    fn serialize_metadata(&self) -> Option<String> {
1437        self.metadata.serialize()
1438    }
1439
1440    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1441        Ok(Arc::new(Metadata::deserialize(metadata)?))
1442    }
1443
1444    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1445        match data_type {
1446            DataType::Binary | DataType::LargeBinary | DataType::BinaryView => Ok(()),
1447            dt => Err(ArrowError::SchemaError(format!(
1448                "Unexpected data type {dt}"
1449            ))),
1450        }
1451    }
1452
1453    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1454        let wkb = Self { metadata };
1455        wkb.supports_data_type(data_type)?;
1456        Ok(wkb)
1457    }
1458}
1459
1460/// A GeoArrow WKT type.
1461///
1462/// This extension type support multiple physical data types, including [`DataType::Utf8`],
1463/// [`DataType::LargeUtf8`], and [`DataType::Utf8View`].
1464#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
1465pub struct WktType {
1466    metadata: Arc<Metadata>,
1467}
1468
1469impl WktType {
1470    /// Construct a new type from parts.
1471    pub fn new(metadata: Arc<Metadata>) -> Self {
1472        Self { metadata }
1473    }
1474
1475    /// Change the underlying [`Metadata`]
1476    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
1477        Self { metadata }
1478    }
1479
1480    /// Retrieve the underlying [`Metadata`]
1481    pub fn metadata(&self) -> &Arc<Metadata> {
1482        &self.metadata
1483    }
1484}
1485
1486impl ExtensionType for WktType {
1487    const NAME: &'static str = "geoarrow.wkt";
1488
1489    type Metadata = Arc<Metadata>;
1490
1491    fn metadata(&self) -> &Self::Metadata {
1492        self.metadata()
1493    }
1494
1495    fn serialize_metadata(&self) -> Option<String> {
1496        self.metadata.serialize()
1497    }
1498
1499    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1500        Ok(Arc::new(Metadata::deserialize(metadata)?))
1501    }
1502
1503    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
1504        match data_type {
1505            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Ok(()),
1506            dt => Err(ArrowError::SchemaError(format!(
1507                "Unexpected data type {dt}"
1508            ))),
1509        }
1510    }
1511
1512    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
1513        let wkb = Self { metadata };
1514        wkb.supports_data_type(data_type)?;
1515        Ok(wkb)
1516    }
1517}
1518
1519fn coord_type_to_data_type(coord_type: CoordType, dim: Dimension) -> DataType {
1520    match (coord_type, dim) {
1521        (CoordType::Interleaved, Dimension::XY) => INTERLEAVED_XY.clone(),
1522
1523        (CoordType::Interleaved, Dimension::XYZ) => INTERLEAVED_XYZ.clone(),
1524
1525        (CoordType::Interleaved, Dimension::XYM) => INTERLEAVED_XYM.clone(),
1526        (CoordType::Interleaved, Dimension::XYZM) => INTERLEAVED_XYZM.clone(),
1527        (CoordType::Separated, Dimension::XY) => SEPARATED_XY.clone(),
1528        (CoordType::Separated, Dimension::XYZ) => SEPARATED_XYZ.clone(),
1529        (CoordType::Separated, Dimension::XYM) => SEPARATED_XYM.clone(),
1530        (CoordType::Separated, Dimension::XYZM) => SEPARATED_XYZM.clone(),
1531    }
1532}
1533
1534#[cfg(test)]
1535mod test {
1536    use std::sync::Arc;
1537
1538    use arrow_schema::{DataType, Field};
1539
1540    use super::*;
1541    use crate::crs::Crs;
1542    use crate::edges::Edges;
1543
1544    #[test]
1545    fn test_point_interleaved_xy() {
1546        let data_type =
1547            DataType::FixedSizeList(Arc::new(Field::new("xy", DataType::Float64, false)), 2);
1548        let metadata = Arc::new(Metadata::default());
1549        let type_ = PointType::try_new(&data_type, metadata).unwrap();
1550
1551        assert_eq!(type_.coord_type, CoordType::Interleaved);
1552        assert_eq!(type_.dim, Dimension::XY);
1553        assert_eq!(type_.serialize_metadata(), None);
1554    }
1555
1556    #[test]
1557    fn test_point_separated_xyz() {
1558        let data_type = DataType::Struct(
1559            vec![
1560                Field::new("x", DataType::Float64, false),
1561                Field::new("y", DataType::Float64, false),
1562                Field::new("z", DataType::Float64, false),
1563            ]
1564            .into(),
1565        );
1566        let metadata = Arc::new(Metadata::default());
1567        let type_ = PointType::try_new(&data_type, metadata).unwrap();
1568
1569        assert_eq!(type_.coord_type, CoordType::Separated);
1570        assert_eq!(type_.dim, Dimension::XYZ);
1571        assert_eq!(type_.serialize_metadata(), None);
1572    }
1573
1574    #[test]
1575    fn test_point_metadata() {
1576        let data_type =
1577            DataType::FixedSizeList(Arc::new(Field::new("xy", DataType::Float64, false)), 2);
1578        let crs = Crs::from_authority_code("EPSG:4326".to_string());
1579        let metadata = Arc::new(Metadata::new(crs, Some(Edges::Spherical)));
1580        let type_ = PointType::try_new(&data_type, metadata).unwrap();
1581
1582        let expected = r#"{"crs":"EPSG:4326","crs_type":"authority_code","edges":"spherical"}"#;
1583        assert_eq!(type_.serialize_metadata().as_deref(), Some(expected));
1584    }
1585
1586    #[test]
1587    fn geometry_data_type() {
1588        let typ = GeometryCollectionType::new(Dimension::XY, Default::default());
1589        dbg!(typ.data_type());
1590    }
1591}