geoarrow_schema/
datatype.rs

1//! Contains the implementation of [`GeoArrowType`], which defines all geometry arrays in this
2//! crate.
3
4use std::sync::Arc;
5
6use arrow_schema::extension::ExtensionType;
7use arrow_schema::{DataType, Field};
8
9use crate::error::{GeoArrowError, GeoArrowResult};
10use crate::{
11    BoxType, CoordType, Dimension, GeometryCollectionType, GeometryType, LineStringType, Metadata,
12    MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType, WkbType,
13    WktType,
14};
15
16/// Geospatial data types supported by GeoArrow.
17///
18/// The variants of this enum include all possible GeoArrow geometry types, including both "native"
19/// and "serialized" encodings.
20///
21/// Each variant uniquely identifies the physical buffer layout for the respective array type.
22#[derive(Debug, Clone, PartialEq, Eq, Hash)]
23pub enum GeoArrowType {
24    /// A Point.
25    Point(PointType),
26
27    /// A LineString.
28    LineString(LineStringType),
29
30    /// A Polygon.
31    Polygon(PolygonType),
32
33    /// A MultiPoint.
34    MultiPoint(MultiPointType),
35
36    /// A MultiLineString.
37    MultiLineString(MultiLineStringType),
38
39    /// A MultiPolygon.
40    MultiPolygon(MultiPolygonType),
41
42    /// A GeometryCollection.
43    GeometryCollection(GeometryCollectionType),
44
45    /// A Rect.
46    Rect(BoxType),
47
48    /// A Geometry with unknown types or dimensions.
49    Geometry(GeometryType),
50
51    /// A WKB stored in a `BinaryArray` with `i32` offsets.
52    Wkb(WkbType),
53
54    /// A WKB stored in a `LargeBinaryArray` with `i64` offsets.
55    LargeWkb(WkbType),
56
57    /// A WKB stored in a `BinaryViewArray`.
58    WkbView(WkbType),
59
60    /// A WKT stored in a `StringArray` with `i32` offsets.
61    Wkt(WktType),
62
63    /// A WKT stored in a `LargeStringArray` with `i64` offsets.
64    LargeWkt(WktType),
65
66    /// A WKT stored in a `StringViewArray`.
67    WktView(WktType),
68}
69
70impl From<GeoArrowType> for DataType {
71    fn from(value: GeoArrowType) -> Self {
72        value.to_data_type()
73    }
74}
75
76impl GeoArrowType {
77    /// Get the [`CoordType`] of this data type.
78    ///
79    /// WKB and WKT variants will return `None`.
80    pub fn coord_type(&self) -> Option<CoordType> {
81        use GeoArrowType::*;
82        match self {
83            Point(t) => Some(t.coord_type()),
84            LineString(t) => Some(t.coord_type()),
85            Polygon(t) => Some(t.coord_type()),
86            MultiPoint(t) => Some(t.coord_type()),
87            MultiLineString(t) => Some(t.coord_type()),
88            MultiPolygon(t) => Some(t.coord_type()),
89            GeometryCollection(t) => Some(t.coord_type()),
90            Rect(_) => Some(CoordType::Separated),
91            Geometry(t) => Some(t.coord_type()),
92            Wkb(_) | LargeWkb(_) | WkbView(_) | Wkt(_) | LargeWkt(_) | WktView(_) => None,
93        }
94    }
95
96    /// Get the [`Dimension`] of this data type, if it has one.
97    ///
98    /// [`Geometry`][Self::Geometry] and WKB and WKT variants will return `None`.
99    pub fn dimension(&self) -> Option<Dimension> {
100        use GeoArrowType::*;
101        match self {
102            Point(t) => Some(t.dimension()),
103            LineString(t) => Some(t.dimension()),
104            Polygon(t) => Some(t.dimension()),
105            MultiPoint(t) => Some(t.dimension()),
106            MultiLineString(t) => Some(t.dimension()),
107            MultiPolygon(t) => Some(t.dimension()),
108            GeometryCollection(t) => Some(t.dimension()),
109            Rect(t) => Some(t.dimension()),
110            Geometry(_) | Wkb(_) | LargeWkb(_) | WkbView(_) | Wkt(_) | LargeWkt(_) | WktView(_) => {
111                None
112            }
113        }
114    }
115
116    /// Returns the [Metadata] contained within this type.
117    pub fn metadata(&self) -> &Arc<Metadata> {
118        use GeoArrowType::*;
119        match self {
120            Point(t) => t.metadata(),
121            LineString(t) => t.metadata(),
122            Polygon(t) => t.metadata(),
123            MultiPoint(t) => t.metadata(),
124            MultiLineString(t) => t.metadata(),
125            MultiPolygon(t) => t.metadata(),
126            GeometryCollection(t) => t.metadata(),
127            Rect(t) => t.metadata(),
128            Geometry(t) => t.metadata(),
129            Wkb(t) | LargeWkb(t) | WkbView(t) => t.metadata(),
130            Wkt(t) | LargeWkt(t) | WktView(t) => t.metadata(),
131        }
132    }
133    /// Converts a [`GeoArrowType`] into the relevant arrow [`DataType`].
134    ///
135    /// Note that an arrow [`DataType`] will lose the accompanying GeoArrow metadata if it is not
136    /// part of a [`Field`] with GeoArrow extension metadata in its field metadata.
137    ///
138    /// # Examples
139    ///
140    /// ```
141    /// # use arrow_schema::DataType;
142    /// # use geoarrow_schema::{Dimension, GeoArrowType, PointType};
143    /// #
144    /// let point_type = PointType::new(Dimension::XY, Default::default());
145    /// let data_type = GeoArrowType::Point(point_type).to_data_type();
146    /// assert!(matches!(data_type, DataType::Struct(_)));
147    /// ```
148    pub fn to_data_type(&self) -> DataType {
149        use GeoArrowType::*;
150        match self {
151            Point(t) => t.data_type(),
152            LineString(t) => t.data_type(),
153            Polygon(t) => t.data_type(),
154            MultiPoint(t) => t.data_type(),
155            MultiLineString(t) => t.data_type(),
156            MultiPolygon(t) => t.data_type(),
157            GeometryCollection(t) => t.data_type(),
158            Rect(t) => t.data_type(),
159            Geometry(t) => t.data_type(),
160            Wkb(_) => DataType::Binary,
161            LargeWkb(_) => DataType::LargeBinary,
162            WkbView(_) => DataType::BinaryView,
163            Wkt(_) => DataType::Utf8,
164            LargeWkt(_) => DataType::LargeUtf8,
165            WktView(_) => DataType::Utf8View,
166        }
167    }
168
169    /// Converts this [`GeoArrowType`] into an arrow [`Field`], maintaining GeoArrow extension
170    /// metadata.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// # use geoarrow_schema::{Dimension, GeoArrowType, PointType};
176    /// #
177    /// let point_type = PointType::new(Dimension::XY, Default::default());
178    /// let geoarrow_type = GeoArrowType::Point(point_type);
179    /// let field = geoarrow_type.to_field("geometry", true);
180    /// assert_eq!(field.name(), "geometry");
181    /// assert!(field.is_nullable());
182    /// assert_eq!(field.metadata()["ARROW:extension:name"], "geoarrow.point");
183    /// ```
184    pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
185        use GeoArrowType::*;
186        match self {
187            Point(t) => t.to_field(name, nullable),
188            LineString(t) => t.to_field(name, nullable),
189            Polygon(t) => t.to_field(name, nullable),
190            MultiPoint(t) => t.to_field(name, nullable),
191            MultiLineString(t) => t.to_field(name, nullable),
192            MultiPolygon(t) => t.to_field(name, nullable),
193            GeometryCollection(t) => t.to_field(name, nullable),
194            Rect(t) => t.to_field(name, nullable),
195            Geometry(t) => t.to_field(name, nullable),
196            Wkb(t) | LargeWkb(t) | WkbView(t) => {
197                Field::new(name, self.to_data_type(), nullable).with_extension_type(t.clone())
198            }
199            Wkt(t) | LargeWkt(t) | WktView(t) => {
200                Field::new(name, self.to_data_type(), nullable).with_extension_type(t.clone())
201            }
202        }
203    }
204
205    /// Applies the provided [CoordType] onto self.
206    ///
207    /// [`Rect`][Self::Rect] and WKB and WKT variants will return the same type as they do not have
208    /// a parameterized coordinate types.
209    ///
210    /// # Examples
211    ///
212    /// ```
213    /// # use geoarrow_schema::{CoordType, Dimension, GeoArrowType, PointType};
214    /// #
215    /// let point_type = PointType::new(Dimension::XY, Default::default());
216    /// let geoarrow_type = GeoArrowType::Point(point_type);
217    /// let new_type = geoarrow_type.with_coord_type(CoordType::Separated);
218    ///
219    /// assert_eq!(new_type.coord_type(), Some(CoordType::Separated));
220    /// ```
221    pub fn with_coord_type(self, coord_type: CoordType) -> GeoArrowType {
222        use GeoArrowType::*;
223        match self {
224            Point(t) => Point(t.with_coord_type(coord_type)),
225            LineString(t) => LineString(t.with_coord_type(coord_type)),
226            Polygon(t) => Polygon(t.with_coord_type(coord_type)),
227            MultiPoint(t) => MultiPoint(t.with_coord_type(coord_type)),
228            MultiLineString(t) => MultiLineString(t.with_coord_type(coord_type)),
229            MultiPolygon(t) => MultiPolygon(t.with_coord_type(coord_type)),
230            GeometryCollection(t) => GeometryCollection(t.with_coord_type(coord_type)),
231            Rect(t) => Rect(t),
232            Geometry(t) => Geometry(t.with_coord_type(coord_type)),
233            _ => self,
234        }
235    }
236
237    /// Applies the provided [Dimension] onto self.
238    ///
239    /// [`Geometry`][Self::Geometry] and WKB and WKT variants will return the same type as they do
240    /// not have a parameterized dimension.
241    ///
242    /// # Examples
243    ///
244    /// ```
245    /// # use geoarrow_schema::{Dimension, GeoArrowType, PointType};
246    /// #
247    /// let point_type = PointType::new(Dimension::XY, Default::default());
248    /// let geoarrow_type = GeoArrowType::Point(point_type);
249    /// let new_type = geoarrow_type.with_dimension(Dimension::XYZ);
250    ///
251    /// assert_eq!(new_type.dimension(), Some(Dimension::XYZ));
252    /// ```
253    pub fn with_dimension(self, dim: Dimension) -> GeoArrowType {
254        use GeoArrowType::*;
255        match self {
256            Point(t) => Point(t.with_dimension(dim)),
257            LineString(t) => LineString(t.with_dimension(dim)),
258            Polygon(t) => Polygon(t.with_dimension(dim)),
259            MultiPoint(t) => MultiPoint(t.with_dimension(dim)),
260            MultiLineString(t) => MultiLineString(t.with_dimension(dim)),
261            MultiPolygon(t) => MultiPolygon(t.with_dimension(dim)),
262            GeometryCollection(t) => GeometryCollection(t.with_dimension(dim)),
263            Rect(t) => Rect(t.with_dimension(dim)),
264            Geometry(t) => Geometry(t),
265            _ => self,
266        }
267    }
268
269    /// Applies the provided [Metadata] onto self.
270    pub fn with_metadata(self, meta: Arc<Metadata>) -> GeoArrowType {
271        use GeoArrowType::*;
272        match self {
273            Point(t) => Point(t.with_metadata(meta)),
274            LineString(t) => LineString(t.with_metadata(meta)),
275            Polygon(t) => Polygon(t.with_metadata(meta)),
276            MultiPoint(t) => MultiPoint(t.with_metadata(meta)),
277            MultiLineString(t) => MultiLineString(t.with_metadata(meta)),
278            MultiPolygon(t) => MultiPolygon(t.with_metadata(meta)),
279            GeometryCollection(t) => GeometryCollection(t.with_metadata(meta)),
280            Rect(t) => Rect(t.with_metadata(meta)),
281            Geometry(t) => Geometry(t.with_metadata(meta)),
282            Wkb(t) => Wkb(t.with_metadata(meta)),
283            LargeWkb(t) => LargeWkb(t.with_metadata(meta)),
284            WkbView(t) => WkbView(t.with_metadata(meta)),
285            Wkt(t) => Wkt(t.with_metadata(meta)),
286            LargeWkt(t) => LargeWkt(t.with_metadata(meta)),
287            WktView(t) => WktView(t.with_metadata(meta)),
288        }
289    }
290
291    /// Create a new [`GeoArrowType`] from an Arrow [`Field`], requiring GeoArrow metadata to be
292    /// set.
293    ///
294    /// If the field does not have at least a GeoArrow extension name, an error will be returned.
295    ///
296    /// See also [`GeoArrowType::from_arrow_field`].
297    pub fn from_extension_field(field: &Field) -> GeoArrowResult<Self> {
298        let extension_name = field.extension_type_name().ok_or(GeoArrowError::InvalidGeoArrow(
299                "Expected GeoArrow extension metadata, but found none, and `require_geoarrow_metadata` is `true`.".to_string(),
300            ))?;
301
302        use GeoArrowType::*;
303        let data_type = match extension_name {
304            PointType::NAME => Point(field.try_extension_type()?),
305            LineStringType::NAME => LineString(field.try_extension_type()?),
306            PolygonType::NAME => Polygon(field.try_extension_type()?),
307            MultiPointType::NAME => MultiPoint(field.try_extension_type()?),
308            MultiLineStringType::NAME => MultiLineString(field.try_extension_type()?),
309            MultiPolygonType::NAME => MultiPolygon(field.try_extension_type()?),
310            GeometryCollectionType::NAME => GeometryCollection(field.try_extension_type()?),
311            BoxType::NAME => Rect(field.try_extension_type()?),
312            GeometryType::NAME => Geometry(field.try_extension_type()?),
313            WkbType::NAME => match field.data_type() {
314                DataType::Binary => Wkb(field.try_extension_type()?),
315                DataType::LargeBinary => LargeWkb(field.try_extension_type()?),
316                DataType::BinaryView => WkbView(field.try_extension_type()?),
317                _ => {
318                    return Err(GeoArrowError::InvalidGeoArrow(format!(
319                        "Expected binary type for a field with extension name 'geoarrow.wkb', got '{}'",
320                        field.data_type()
321                    )));
322                }
323            },
324            WktType::NAME => match field.data_type() {
325                DataType::Utf8 => Wkt(field.try_extension_type()?),
326                DataType::LargeUtf8 => LargeWkt(field.try_extension_type()?),
327                DataType::Utf8View => WktView(field.try_extension_type()?),
328                _ => {
329                    return Err(GeoArrowError::InvalidGeoArrow(format!(
330                        "Expected string type for a field with extension name 'geoarrow.wkt', got '{}'",
331                        field.data_type()
332                    )));
333                }
334            },
335            name => {
336                return Err(GeoArrowError::InvalidGeoArrow(format!(
337                    "Expected a GeoArrow extension name, got an Arrow extension type with name: '{name}'.",
338                )));
339            }
340        };
341        Ok(data_type)
342    }
343
344    /// Create a new [`GeoArrowType`] from an Arrow [`Field`], inferring the GeoArrow type if
345    /// GeoArrow metadata is not present.
346    ///
347    /// This will first try [`GeoArrowType::from_extension_field`], and if that fails, will try to
348    /// infer the GeoArrow type from the field's [DataType]. This only works for Point, WKB, and
349    /// WKT types, as those are the only types that can be unambiguously inferred from an Arrow
350    /// [DataType].
351    pub fn from_arrow_field(field: &Field) -> GeoArrowResult<Self> {
352        use GeoArrowType::*;
353        if let Ok(geo_type) = Self::from_extension_field(field) {
354            Ok(geo_type)
355        } else {
356            let metadata = Arc::new(Metadata::try_from(field)?);
357            let data_type = match field.data_type() {
358                DataType::Struct(struct_fields) => {
359                    if !struct_fields.iter().all(|f| matches!(f.data_type(), DataType::Float64) ) {
360                        return Err(GeoArrowError::InvalidGeoArrow("all struct fields must be Float64 when inferring point type.".to_string()));
361                    }
362
363                    match struct_fields.len() {
364                        2 => GeoArrowType::Point(PointType::new( Dimension::XY, metadata).with_coord_type(CoordType::Separated)),
365                        3 => GeoArrowType::Point(PointType::new( Dimension::XYZ, metadata).with_coord_type(CoordType::Separated)),
366                        4 => GeoArrowType::Point(PointType::new( Dimension::XYZM, metadata).with_coord_type(CoordType::Separated)),
367                        l => return Err(GeoArrowError::InvalidGeoArrow(format!("invalid number of struct fields: {l}"))),
368                    }
369                },
370                DataType::FixedSizeList(inner_field, list_size) => {
371                    if !matches!(inner_field.data_type(), DataType::Float64 )  {
372                        return Err(GeoArrowError::InvalidGeoArrow(format!("invalid inner field type of fixed size list: {}", inner_field.data_type())));
373                    }
374
375                    match list_size {
376                        2 => GeoArrowType::Point(PointType::new(Dimension::XY, metadata).with_coord_type(CoordType::Interleaved)),
377                        3 => GeoArrowType::Point(PointType::new(Dimension::XYZ, metadata).with_coord_type(CoordType::Interleaved)),
378                        4 => GeoArrowType::Point(PointType::new(Dimension::XYZM, metadata).with_coord_type(CoordType::Interleaved)),
379                        _ => return Err(GeoArrowError::InvalidGeoArrow(format!("invalid list_size: {list_size}"))),
380                    }
381                },
382                DataType::Binary => Wkb(WkbType::new(metadata)),
383                DataType::LargeBinary => LargeWkb(WkbType::new(metadata)),
384                DataType::BinaryView => WkbView(WkbType::new(metadata)),
385                DataType::Utf8 => Wkt(WktType::new(metadata)),
386                DataType::LargeUtf8 => LargeWkt(WktType::new(metadata)),
387                DataType::Utf8View => WktView(WktType::new(metadata)),
388                _ => return Err(GeoArrowError::InvalidGeoArrow("Only FixedSizeList, Struct, Binary, LargeBinary, BinaryView, String, LargeString, and StringView arrays are unambigously typed for a GeoArrow type and can be used without extension metadata.\nEnsure your array input has GeoArrow metadata.".to_string())),
389            };
390
391            Ok(data_type)
392        }
393    }
394}
395
396macro_rules! impl_into_geoarrowtype {
397    ($source_type:ident, $variant:expr) => {
398        impl From<$source_type> for GeoArrowType {
399            fn from(value: $source_type) -> Self {
400                $variant(value)
401            }
402        }
403    };
404}
405
406impl_into_geoarrowtype!(PointType, GeoArrowType::Point);
407impl_into_geoarrowtype!(LineStringType, GeoArrowType::LineString);
408impl_into_geoarrowtype!(PolygonType, GeoArrowType::Polygon);
409impl_into_geoarrowtype!(MultiPointType, GeoArrowType::MultiPoint);
410impl_into_geoarrowtype!(MultiLineStringType, GeoArrowType::MultiLineString);
411impl_into_geoarrowtype!(MultiPolygonType, GeoArrowType::MultiPolygon);
412impl_into_geoarrowtype!(GeometryCollectionType, GeoArrowType::GeometryCollection);
413impl_into_geoarrowtype!(BoxType, GeoArrowType::Rect);
414impl_into_geoarrowtype!(GeometryType, GeoArrowType::Geometry);
415
416impl TryFrom<&Field> for GeoArrowType {
417    type Error = GeoArrowError;
418
419    fn try_from(field: &Field) -> GeoArrowResult<Self> {
420        Self::from_extension_field(field)
421    }
422}