geoarrow_schema/
datatype.rs

1//! Contains the implementation of [`GeoArrowType`], which defines all geometry arrays in this
2//! crate.
3
4use std::sync::Arc;
5
6use arrow_schema::extension::ExtensionType;
7use arrow_schema::{DataType, Field};
8
9use crate::error::{GeoArrowError, GeoArrowResult};
10use crate::{
11    BoxType, CoordType, Dimension, GeometryCollectionType, GeometryType, LineStringType, Metadata,
12    MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType, WkbType,
13    WktType,
14};
15
16/// Geospatial data types supported by GeoArrow.
17///
18/// The variants of this enum include all possible GeoArrow geometry types, including both "native"
19/// and "serialized" encodings.
20///
21/// Each variant uniquely identifies the physical buffer layout for the respective array type.
22#[derive(Debug, Clone, PartialEq, Eq, Hash)]
23pub enum GeoArrowType {
24    /// A Point.
25    Point(PointType),
26
27    /// A LineString.
28    LineString(LineStringType),
29
30    /// A Polygon.
31    Polygon(PolygonType),
32
33    /// A MultiPoint.
34    MultiPoint(MultiPointType),
35
36    /// A MultiLineString.
37    MultiLineString(MultiLineStringType),
38
39    /// A MultiPolygon.
40    MultiPolygon(MultiPolygonType),
41
42    /// A GeometryCollection.
43    GeometryCollection(GeometryCollectionType),
44
45    /// A Rect.
46    Rect(BoxType),
47
48    /// A Geometry with unknown types or dimensions.
49    Geometry(GeometryType),
50
51    /// A WKB stored in a `BinaryArray` with `i32` offsets.
52    Wkb(WkbType),
53
54    /// A WKB stored in a `LargeBinaryArray` with `i64` offsets.
55    LargeWkb(WkbType),
56
57    /// A WKB stored in a `BinaryViewArray`.
58    WkbView(WkbType),
59
60    /// A WKT stored in a `StringArray` with `i32` offsets.
61    Wkt(WktType),
62
63    /// A WKT stored in a `LargeStringArray` with `i64` offsets.
64    LargeWkt(WktType),
65
66    /// A WKT stored in a `StringViewArray`.
67    WktView(WktType),
68}
69
70impl From<GeoArrowType> for DataType {
71    fn from(value: GeoArrowType) -> Self {
72        value.to_data_type()
73    }
74}
75
76impl GeoArrowType {
77    /// Get the [`CoordType`] of this data type.
78    ///
79    /// WKB and WKT variants will return `None`.
80    pub fn coord_type(&self) -> Option<CoordType> {
81        use GeoArrowType::*;
82        match self {
83            Point(t) => Some(t.coord_type()),
84            LineString(t) => Some(t.coord_type()),
85            Polygon(t) => Some(t.coord_type()),
86            MultiPoint(t) => Some(t.coord_type()),
87            MultiLineString(t) => Some(t.coord_type()),
88            MultiPolygon(t) => Some(t.coord_type()),
89            GeometryCollection(t) => Some(t.coord_type()),
90            Rect(_) => Some(CoordType::Separated),
91            Geometry(t) => Some(t.coord_type()),
92            Wkb(_) | LargeWkb(_) | WkbView(_) | Wkt(_) | LargeWkt(_) | WktView(_) => None,
93        }
94    }
95
96    /// Get the [`Dimension`] of this data type, if it has one.
97    ///
98    /// [`Geometry`][Self::Geometry] and WKB and WKT variants will return `None`.
99    pub fn dimension(&self) -> Option<Dimension> {
100        use GeoArrowType::*;
101        match self {
102            Point(t) => Some(t.dimension()),
103            LineString(t) => Some(t.dimension()),
104            Polygon(t) => Some(t.dimension()),
105            MultiPoint(t) => Some(t.dimension()),
106            MultiLineString(t) => Some(t.dimension()),
107            MultiPolygon(t) => Some(t.dimension()),
108            GeometryCollection(t) => Some(t.dimension()),
109            Rect(t) => Some(t.dimension()),
110            Geometry(_) | Wkb(_) | LargeWkb(_) | WkbView(_) | Wkt(_) | LargeWkt(_) | WktView(_) => {
111                None
112            }
113        }
114    }
115
116    /// Returns the [Metadata] contained within this type.
117    pub fn metadata(&self) -> &Arc<Metadata> {
118        use GeoArrowType::*;
119        match self {
120            Point(t) => t.metadata(),
121            LineString(t) => t.metadata(),
122            Polygon(t) => t.metadata(),
123            MultiPoint(t) => t.metadata(),
124            MultiLineString(t) => t.metadata(),
125            MultiPolygon(t) => t.metadata(),
126            GeometryCollection(t) => t.metadata(),
127            Rect(t) => t.metadata(),
128            Geometry(t) => t.metadata(),
129            Wkb(t) | LargeWkb(t) | WkbView(t) => t.metadata(),
130            Wkt(t) | LargeWkt(t) | WktView(t) => t.metadata(),
131        }
132    }
133    /// Converts a [`GeoArrowType`] into the relevant arrow [`DataType`].
134    ///
135    /// Note that an arrow [`DataType`] will lose the accompanying GeoArrow metadata if it is not
136    /// part of a [`Field`] with GeoArrow extension metadata in its field metadata.
137    ///
138    /// # Examples
139    ///
140    /// ```
141    /// # use arrow_schema::DataType;
142    /// # use geoarrow_schema::{Dimension, GeoArrowType, PointType};
143    /// #
144    /// let point_type = PointType::new(Dimension::XY, Default::default());
145    /// let data_type = GeoArrowType::Point(point_type).to_data_type();
146    /// assert!(matches!(data_type, DataType::Struct(_)));
147    /// ```
148    pub fn to_data_type(&self) -> DataType {
149        use GeoArrowType::*;
150        match self {
151            Point(t) => t.data_type(),
152            LineString(t) => t.data_type(),
153            Polygon(t) => t.data_type(),
154            MultiPoint(t) => t.data_type(),
155            MultiLineString(t) => t.data_type(),
156            MultiPolygon(t) => t.data_type(),
157            GeometryCollection(t) => t.data_type(),
158            Rect(t) => t.data_type(),
159            Geometry(t) => t.data_type(),
160            Wkb(_) => DataType::Binary,
161            LargeWkb(_) => DataType::LargeBinary,
162            WkbView(_) => DataType::BinaryView,
163            Wkt(_) => DataType::Utf8,
164            LargeWkt(_) => DataType::LargeUtf8,
165            WktView(_) => DataType::Utf8View,
166        }
167    }
168
169    /// Converts this [`GeoArrowType`] into an arrow [`Field`], maintaining GeoArrow extension
170    /// metadata.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// # use geoarrow_schema::{Dimension, GeoArrowType, PointType};
176    /// #
177    /// let point_type = PointType::new(Dimension::XY, Default::default());
178    /// let geoarrow_type = GeoArrowType::Point(point_type);
179    /// let field = geoarrow_type.to_field("geometry", true);
180    /// assert_eq!(field.name(), "geometry");
181    /// assert!(field.is_nullable());
182    /// assert_eq!(field.metadata()["ARROW:extension:name"], "geoarrow.point");
183    /// ```
184    pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
185        use GeoArrowType::*;
186        match self {
187            Point(t) => t.to_field(name, nullable),
188            LineString(t) => t.to_field(name, nullable),
189            Polygon(t) => t.to_field(name, nullable),
190            MultiPoint(t) => t.to_field(name, nullable),
191            MultiLineString(t) => t.to_field(name, nullable),
192            MultiPolygon(t) => t.to_field(name, nullable),
193            GeometryCollection(t) => t.to_field(name, nullable),
194            Rect(t) => t.to_field(name, nullable),
195            Geometry(t) => t.to_field(name, nullable),
196            Wkb(t) | LargeWkb(t) | WkbView(t) => {
197                Field::new(name, self.to_data_type(), nullable).with_extension_type(t.clone())
198            }
199            Wkt(t) | LargeWkt(t) | WktView(t) => {
200                Field::new(name, self.to_data_type(), nullable).with_extension_type(t.clone())
201            }
202        }
203    }
204
205    /// Applies the provided [CoordType] onto self.
206    ///
207    /// [`Rect`][Self::Rect] and WKB and WKT variants will return the same type as they do not have
208    /// a parameterized coordinate types.
209    ///
210    /// # Examples
211    ///
212    /// ```
213    /// # use geoarrow_schema::{CoordType, Dimension, GeoArrowType, PointType};
214    /// #
215    /// let point_type = PointType::new(Dimension::XY, Default::default());
216    /// let geoarrow_type = GeoArrowType::Point(point_type);
217    /// let new_type = geoarrow_type.with_coord_type(CoordType::Separated);
218    ///
219    /// assert_eq!(new_type.coord_type(), Some(CoordType::Separated));
220    /// ```
221    pub fn with_coord_type(self, coord_type: CoordType) -> GeoArrowType {
222        use GeoArrowType::*;
223        match self {
224            Point(t) => Point(t.with_coord_type(coord_type)),
225            LineString(t) => LineString(t.with_coord_type(coord_type)),
226            Polygon(t) => Polygon(t.with_coord_type(coord_type)),
227            MultiPoint(t) => MultiPoint(t.with_coord_type(coord_type)),
228            MultiLineString(t) => MultiLineString(t.with_coord_type(coord_type)),
229            MultiPolygon(t) => MultiPolygon(t.with_coord_type(coord_type)),
230            GeometryCollection(t) => GeometryCollection(t.with_coord_type(coord_type)),
231            Rect(t) => Rect(t),
232            Geometry(t) => Geometry(t.with_coord_type(coord_type)),
233            _ => self,
234        }
235    }
236
237    /// Applies the provided [Dimension] onto self.
238    ///
239    /// [`Geometry`][Self::Geometry] and WKB and WKT variants will return the same type as they do
240    /// not have a parameterized dimension.
241    ///
242    /// # Examples
243    ///
244    /// ```
245    /// # use geoarrow_schema::{Dimension, GeoArrowType, PointType};
246    /// #
247    /// let point_type = PointType::new(Dimension::XY, Default::default());
248    /// let geoarrow_type = GeoArrowType::Point(point_type);
249    /// let new_type = geoarrow_type.with_dimension(Dimension::XYZ);
250    ///
251    /// assert_eq!(new_type.dimension(), Some(Dimension::XYZ));
252    /// ```
253    pub fn with_dimension(self, dim: Dimension) -> GeoArrowType {
254        use GeoArrowType::*;
255        match self {
256            Point(t) => Point(t.with_dimension(dim)),
257            LineString(t) => LineString(t.with_dimension(dim)),
258            Polygon(t) => Polygon(t.with_dimension(dim)),
259            MultiPoint(t) => MultiPoint(t.with_dimension(dim)),
260            MultiLineString(t) => MultiLineString(t.with_dimension(dim)),
261            MultiPolygon(t) => MultiPolygon(t.with_dimension(dim)),
262            GeometryCollection(t) => GeometryCollection(t.with_dimension(dim)),
263            Rect(t) => Rect(t.with_dimension(dim)),
264            Geometry(t) => Geometry(t),
265            _ => self,
266        }
267    }
268
269    /// Applies the provided [Metadata] onto self.
270    pub fn with_metadata(self, meta: Arc<Metadata>) -> GeoArrowType {
271        use GeoArrowType::*;
272        match self {
273            Point(t) => Point(t.with_metadata(meta)),
274            LineString(t) => LineString(t.with_metadata(meta)),
275            Polygon(t) => Polygon(t.with_metadata(meta)),
276            MultiPoint(t) => MultiPoint(t.with_metadata(meta)),
277            MultiLineString(t) => MultiLineString(t.with_metadata(meta)),
278            MultiPolygon(t) => MultiPolygon(t.with_metadata(meta)),
279            GeometryCollection(t) => GeometryCollection(t.with_metadata(meta)),
280            Rect(t) => Rect(t.with_metadata(meta)),
281            Geometry(t) => Geometry(t.with_metadata(meta)),
282            Wkb(t) => Wkb(t.with_metadata(meta)),
283            LargeWkb(t) => LargeWkb(t.with_metadata(meta)),
284            WkbView(t) => WkbView(t.with_metadata(meta)),
285            Wkt(t) => Wkt(t.with_metadata(meta)),
286            LargeWkt(t) => LargeWkt(t.with_metadata(meta)),
287            WktView(t) => WktView(t.with_metadata(meta)),
288        }
289    }
290
291    /// Create a new [`GeoArrowType`] from an Arrow [`Field`], requiring GeoArrow metadata to be
292    /// set.
293    ///
294    /// If the field does not have at least a GeoArrow extension name, an error will be returned.
295    ///
296    /// Create a new [`GeoArrowType`] from an Arrow [`Field`].
297    ///
298    /// This method requires GeoArrow metadata to be correctly set. If you wish to allow data type
299    /// coercion without GeoArrow metadata, use [`GeoArrowType::from_arrow_field`] instead.
300    ///
301    /// - An `Ok(Some(_))` return value indicates that the field has valid GeoArrow extension metadata, and thus was able to match to a specific GeoArrow type.
302    /// - An `Ok(None)` return value indicates that the field either does not have any Arrow extension name or the extension name is not a GeoArrow extension name.
303    /// - An `Err` return value indicates that the field has a GeoArrow extension name, but it is
304    ///   invalid. This can happen if the field's [`DataType`] is not compatible with the allowed
305    ///   types for the given GeoArrow type, or if the GeoArrow metadata is malformed.
306    pub fn from_extension_field(field: &Field) -> GeoArrowResult<Option<Self>> {
307        if let Some(extension_name) = field.extension_type_name() {
308            use GeoArrowType::*;
309            let data_type = match extension_name {
310                PointType::NAME => Point(field.try_extension_type()?),
311                LineStringType::NAME => LineString(field.try_extension_type()?),
312                PolygonType::NAME => Polygon(field.try_extension_type()?),
313                MultiPointType::NAME => MultiPoint(field.try_extension_type()?),
314                MultiLineStringType::NAME => MultiLineString(field.try_extension_type()?),
315                MultiPolygonType::NAME => MultiPolygon(field.try_extension_type()?),
316                GeometryCollectionType::NAME => GeometryCollection(field.try_extension_type()?),
317                BoxType::NAME => Rect(field.try_extension_type()?),
318                GeometryType::NAME => Geometry(field.try_extension_type()?),
319                WkbType::NAME => match field.data_type() {
320                    DataType::Binary => Wkb(field.try_extension_type()?),
321                    DataType::LargeBinary => LargeWkb(field.try_extension_type()?),
322                    DataType::BinaryView => WkbView(field.try_extension_type()?),
323                    _ => {
324                        return Err(GeoArrowError::InvalidGeoArrow(format!(
325                            "Expected binary type for a field with extension name 'geoarrow.wkb', got '{}'",
326                            field.data_type()
327                        )));
328                    }
329                },
330                WktType::NAME => match field.data_type() {
331                    DataType::Utf8 => Wkt(field.try_extension_type()?),
332                    DataType::LargeUtf8 => LargeWkt(field.try_extension_type()?),
333                    DataType::Utf8View => WktView(field.try_extension_type()?),
334                    _ => {
335                        return Err(GeoArrowError::InvalidGeoArrow(format!(
336                            "Expected string type for a field with extension name 'geoarrow.wkt', got '{}'",
337                            field.data_type()
338                        )));
339                    }
340                },
341                _ => return Ok(None),
342            };
343            Ok(Some(data_type))
344        } else {
345            Ok(None)
346        }
347    }
348
349    /// Create a new [`GeoArrowType`] from an Arrow [`Field`], inferring the GeoArrow type if
350    /// GeoArrow metadata is not present.
351    ///
352    /// This will first try [`GeoArrowType::from_extension_field`], and if that fails, will try to
353    /// infer the GeoArrow type from the field's [DataType]. This only works for Point, WKB, and
354    /// WKT types, as those are the only types that can be unambiguously inferred from an Arrow
355    /// [DataType].
356    pub fn from_arrow_field(field: &Field) -> GeoArrowResult<Self> {
357        use GeoArrowType::*;
358        if let Some(geo_type) = Self::from_extension_field(field)? {
359            Ok(geo_type)
360        } else {
361            let metadata = Arc::new(Metadata::try_from(field)?);
362            let data_type = match field.data_type() {
363                DataType::Struct(struct_fields) => {
364                    if !struct_fields.iter().all(|f| matches!(f.data_type(), DataType::Float64) ) {
365                        return Err(GeoArrowError::InvalidGeoArrow("all struct fields must be Float64 when inferring point type.".to_string()));
366                    }
367
368                    match struct_fields.len() {
369                        2 => GeoArrowType::Point(PointType::new( Dimension::XY, metadata).with_coord_type(CoordType::Separated)),
370                        3 => GeoArrowType::Point(PointType::new( Dimension::XYZ, metadata).with_coord_type(CoordType::Separated)),
371                        4 => GeoArrowType::Point(PointType::new( Dimension::XYZM, metadata).with_coord_type(CoordType::Separated)),
372                        l => return Err(GeoArrowError::InvalidGeoArrow(format!("invalid number of struct fields: {l}"))),
373                    }
374                },
375                DataType::FixedSizeList(inner_field, list_size) => {
376                    if !matches!(inner_field.data_type(), DataType::Float64 )  {
377                        return Err(GeoArrowError::InvalidGeoArrow(format!("invalid inner field type of fixed size list: {}", inner_field.data_type())));
378                    }
379
380                    match list_size {
381                        2 => GeoArrowType::Point(PointType::new(Dimension::XY, metadata).with_coord_type(CoordType::Interleaved)),
382                        3 => GeoArrowType::Point(PointType::new(Dimension::XYZ, metadata).with_coord_type(CoordType::Interleaved)),
383                        4 => GeoArrowType::Point(PointType::new(Dimension::XYZM, metadata).with_coord_type(CoordType::Interleaved)),
384                        _ => return Err(GeoArrowError::InvalidGeoArrow(format!("invalid list_size: {list_size}"))),
385                    }
386                },
387                DataType::Binary => Wkb(WkbType::new(metadata)),
388                DataType::LargeBinary => LargeWkb(WkbType::new(metadata)),
389                DataType::BinaryView => WkbView(WkbType::new(metadata)),
390                DataType::Utf8 => Wkt(WktType::new(metadata)),
391                DataType::LargeUtf8 => LargeWkt(WktType::new(metadata)),
392                DataType::Utf8View => WktView(WktType::new(metadata)),
393                _ => return Err(GeoArrowError::InvalidGeoArrow("Only FixedSizeList, Struct, Binary, LargeBinary, BinaryView, String, LargeString, and StringView arrays are unambigously typed for a GeoArrow type and can be used without extension metadata.\nEnsure your array input has GeoArrow metadata.".to_string())),
394            };
395
396            Ok(data_type)
397        }
398    }
399}
400
401macro_rules! impl_into_geoarrowtype {
402    ($source_type:ident, $variant:expr) => {
403        impl From<$source_type> for GeoArrowType {
404            fn from(value: $source_type) -> Self {
405                $variant(value)
406            }
407        }
408    };
409}
410
411impl_into_geoarrowtype!(PointType, GeoArrowType::Point);
412impl_into_geoarrowtype!(LineStringType, GeoArrowType::LineString);
413impl_into_geoarrowtype!(PolygonType, GeoArrowType::Polygon);
414impl_into_geoarrowtype!(MultiPointType, GeoArrowType::MultiPoint);
415impl_into_geoarrowtype!(MultiLineStringType, GeoArrowType::MultiLineString);
416impl_into_geoarrowtype!(MultiPolygonType, GeoArrowType::MultiPolygon);
417impl_into_geoarrowtype!(GeometryCollectionType, GeoArrowType::GeometryCollection);
418impl_into_geoarrowtype!(BoxType, GeoArrowType::Rect);
419impl_into_geoarrowtype!(GeometryType, GeoArrowType::Geometry);
420
421impl TryFrom<&Field> for GeoArrowType {
422    type Error = GeoArrowError;
423
424    fn try_from(field: &Field) -> GeoArrowResult<Self> {
425        if let Some(geo_type) = Self::from_extension_field(field)? {
426            Ok(geo_type)
427        } else {
428            Err(GeoArrowError::InvalidGeoArrow(
429                "Expected GeoArrow extension metadata, found none or unsupported extension."
430                    .to_string(),
431            ))
432        }
433    }
434}