geoarrow_array/
trait_.rs

1use std::any::Any;
2use std::fmt::Debug;
3use std::sync::Arc;
4
5use arrow_array::{Array, ArrayRef};
6use arrow_buffer::NullBuffer;
7use arrow_schema::extension::ExtensionType;
8use geo_traits::GeometryTrait;
9use geoarrow_schema::error::GeoArrowResult;
10use geoarrow_schema::{GeoArrowType, Metadata};
11
12/// Convert GeoArrow arrays into their respective [arrow][arrow_array] arrays.
13pub trait IntoArrow {
14    /// The type of arrow array that this geoarrow array can be converted into.
15    type ArrowArray: Array;
16
17    /// The extension type representing this array. It will always be a type defined by
18    /// [geoarrow_schema].
19    type ExtensionType: ExtensionType;
20
21    /// Converts this geoarrow array into an arrow array.
22    ///
23    /// Note that [arrow][arrow_array] arrays do not maintain Arrow extension metadata, so the
24    /// result of this method will omit any spatial extension information. Ensure you call
25    /// [Self::extension_type] to get extension information that you can add to a
26    /// [`Field`][arrow_schema::Field].
27    fn into_arrow(self) -> Self::ArrowArray;
28
29    /// Return the Arrow extension type representing this array.
30    fn extension_type(&self) -> &Self::ExtensionType;
31}
32
33/// A base trait for all GeoArrow arrays.
34///
35/// This is a geospatial corollary to the upstream [`Array`] trait.
36pub trait GeoArrowArray: Debug + Send + Sync {
37    /// Returns the array as [`Any`] so that it can be downcasted to a specific implementation.
38    ///
39    /// Prefer using [`AsGeoArrowArray`][crate::cast::AsGeoArrowArray] instead of calling this
40    /// method and manually downcasting.
41    fn as_any(&self) -> &dyn Any;
42
43    /// Returns the [`GeoArrowType`] of this array.
44    ///
45    /// # Examples
46    ///
47    /// ```
48    /// # use geoarrow_array::builder::PointBuilder;
49    /// # use geoarrow_array::GeoArrowArray;
50    /// # use geoarrow_schema::{Dimension, PointType, GeoArrowType};
51    /// #
52    /// let point = geo_types::point!(x: 1., y: 2.);
53    /// let point_type = PointType::new(Dimension::XY, Default::default());
54    /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish();
55    /// assert_eq!(point_array.data_type(), GeoArrowType::Point(point_type));
56    /// ```
57    fn data_type(&self) -> GeoArrowType;
58
59    /// Converts this array into an `Arc`ed [`arrow`][arrow_array] array, consuming the original
60    /// array.
61    ///
62    /// This is `O(1)`.
63    ///
64    /// Note that **this will omit any spatial extension information**. You must separately store
65    /// the spatial information in a [`Field`][arrow_schema::Field] derived from
66    /// [`Self::data_type`].
67    ///
68    /// # Examples
69    ///
70    /// ```
71    /// # use arrow_array::ArrayRef;
72    /// # use geoarrow_array::builder::PointBuilder;
73    /// # use geoarrow_array::GeoArrowArray;
74    /// # use geoarrow_schema::{Dimension, PointType};
75    /// #
76    /// let point = geo_types::point!(x: 1., y: 2.);
77    /// let point_type = PointType::new(Dimension::XY, Default::default());
78    /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish();
79    /// let array_ref: ArrayRef = point_array.into_array_ref();
80    /// ```
81    #[must_use]
82    fn into_array_ref(self) -> ArrayRef;
83
84    /// Converts this array into an `Arc`ed [`arrow`][arrow_array] array.
85    ///
86    /// This is `O(1)`.
87    ///
88    /// Note that **this will omit any spatial extension information**. You must separately store
89    /// the spatial information in a [`Field`][arrow_schema::Field] derived from
90    /// [`Self::data_type`].
91    ///
92    /// # Examples
93    ///
94    /// ```
95    /// # use arrow_array::ArrayRef;
96    /// # use geoarrow_array::builder::PointBuilder;
97    /// # use geoarrow_array::GeoArrowArray;
98    /// # use geoarrow_schema::{Dimension, PointType};
99    /// #
100    /// let point = geo_types::point!(x: 1., y: 2.);
101    /// let point_type = PointType::new(Dimension::XY, Default::default());
102    /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish();
103    /// let array_ref: ArrayRef = point_array.to_array_ref();
104    /// ```
105    #[must_use]
106    fn to_array_ref(&self) -> ArrayRef;
107
108    /// The number of geometries contained in this array.
109    ///
110    /// # Examples
111    ///
112    /// ```
113    /// # use arrow_array::ArrayRef;
114    /// # use geoarrow_array::builder::PointBuilder;
115    /// # use geoarrow_array::GeoArrowArray;
116    /// # use geoarrow_schema::{Dimension, PointType};
117    /// #
118    /// let point = geo_types::point!(x: 1., y: 2.);
119    /// let point_type = PointType::new(Dimension::XY, Default::default());
120    /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish();
121    /// assert_eq!(point_array.len(), 1);
122    /// ```
123    fn len(&self) -> usize;
124
125    /// Returns `true` if the array is empty.
126    ///
127    /// # Examples
128    ///
129    /// ```
130    /// # use arrow_array::ArrayRef;
131    /// # use geoarrow_array::builder::PointBuilder;
132    /// # use geoarrow_array::GeoArrowArray;
133    /// # use geoarrow_schema::{Dimension, PointType};
134    /// #
135    /// let point = geo_types::point!(x: 1., y: 2.);
136    /// let point_type = PointType::new(Dimension::XY, Default::default());
137    /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish();
138    /// assert!(!point_array.is_empty());
139    /// ```
140    fn is_empty(&self) -> bool {
141        self.len() == 0
142    }
143
144    /// Returns a potentially computed [`NullBuffer``] that represents the logical null values of
145    /// this array, if any.
146    ///
147    /// Logical nulls represent the values that are null in the array, regardless of the underlying
148    /// physical arrow representation.
149    ///
150    /// For most array types, this is equivalent to the "physical" nulls returned by
151    /// [`Array::nulls`]. However it is different for union arrays, including our
152    /// [`GeometryArray`][crate::array::GeometryArray] and
153    /// [`GeometryCollectionArray`][crate::array::GeometryCollectionArray] types, because the
154    /// unions aren't encoded in a single null buffer.
155    fn logical_nulls(&self) -> Option<NullBuffer>;
156
157    /// Returns the number of null slots in this array.
158    ///
159    /// This is `O(1)` since the number of null elements is pre-computed.
160    ///
161    /// # Examples
162    ///
163    /// ```
164    /// # use geoarrow_array::GeoArrowArray;
165    /// # use geoarrow_array::builder::PointBuilder;
166    /// # use geoarrow_schema::{Dimension, PointType};
167    /// #
168    /// let point = geo_types::point!(x: 1., y: 2.);
169    /// let point_type = PointType::new(Dimension::XY, Default::default());
170    /// let point_array =
171    ///     PointBuilder::from_nullable_points([Some(&point), None].into_iter(), point_type.clone()).finish();
172    /// assert_eq!(point_array.logical_null_count(), 1);
173    /// ```
174    fn logical_null_count(&self) -> usize;
175
176    /// Returns whether slot `i` is null.
177    ///
178    /// # Examples
179    ///
180    /// ```
181    /// # use geoarrow_array::GeoArrowArray;
182    /// # use geoarrow_array::builder::PointBuilder;
183    /// # use geoarrow_schema::{Dimension, PointType};
184    /// #
185    /// let point = geo_types::point!(x: 1., y: 2.);
186    ///
187    /// let point_type = PointType::new(Dimension::XY, Default::default());
188    /// let point_array =
189    ///     PointBuilder::from_nullable_points([Some(&point), None].into_iter(), point_type.clone()).finish();
190    /// assert!(point_array.is_null(1));
191    /// ```
192    ///
193    /// # Panics
194    ///
195    /// Panics iff `i >= self.len()`.
196    fn is_null(&self, i: usize) -> bool;
197
198    /// Returns whether slot `i` is valid.
199    ///
200    /// # Examples
201    ///
202    /// ```
203    /// # use geoarrow_array::GeoArrowArray;
204    /// # use geoarrow_array::builder::PointBuilder;
205    /// # use geoarrow_schema::{Dimension, PointType};
206    /// #
207    /// let point = geo_types::point!(x: 1., y: 2.);
208    ///
209    /// let point_type = PointType::new(Dimension::XY, Default::default());
210    /// let point_array =
211    ///     PointBuilder::from_nullable_points([Some(&point), None].into_iter(), point_type.clone()).finish();
212    /// assert!(point_array.is_valid(0));
213    /// ```
214    ///
215    /// # Panics
216    ///
217    /// Panics iff `i >= self.len()`.
218    #[inline]
219    fn is_valid(&self, i: usize) -> bool {
220        !self.is_null(i)
221    }
222
223    /// Returns a zero-copy slice of this array with the indicated offset and length.
224    ///
225    /// # Examples
226    ///
227    /// ```
228    /// # use std::sync::Arc;
229    /// #
230    /// # use geoarrow_array::GeoArrowArray;
231    /// # use geoarrow_array::builder::PointBuilder;
232    /// # use geoarrow_schema::{Dimension, PointType};
233    /// #
234    /// let point1 = geo_types::point!(x: 1., y: 2.);
235    /// let point2 = geo_types::point!(x: 3., y: 4.);
236    ///
237    /// let point_type = PointType::new(Dimension::XY, Default::default());
238    /// let point_array =
239    ///     Arc::new(PointBuilder::from_points([point1, point2].iter(), point_type.clone()).finish())
240    ///         as Arc<dyn GeoArrowArray>;
241    /// let sliced_array = point_array.slice(1, 1);
242    /// assert_eq!(sliced_array.len(), 1);
243    /// ```
244    ///
245    /// # Panics
246    ///
247    /// This function panics iff `offset + length > self.len()`.
248    #[must_use]
249    fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray>;
250
251    /// Change the [`Metadata`] of this array.
252    fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray>;
253}
254
255/// A trait for accessing the values of a [`GeoArrowArray`].
256///
257/// # Performance
258///
259/// Accessing a geometry from a "native" array, such as `PointArray`, `MultiPolygonArray` or
260/// `GeometryArray` will always be constant-time and zero-copy.
261///
262/// Accessing a geometry from a "serialized" array such as `GenericWkbArray` or `GenericWktArray`
263/// will trigger some amount of parsing. In the case of `GenericWkbArray`, accessing an item will
264/// read the WKB header and scan the buffer if needed to find internal geometry offsets, but will
265/// not copy any internal coordinates. This allows for later access to be constant-time (though not
266/// necessarily zero-copy, since WKB is not byte-aligned). In the case of `GenericWktArray`,
267/// accessing a geometry will fully parse the WKT string and copy coordinates to a separate
268/// representation. This means that calling `.iter()` on a `GenericWktArray` will transparently
269/// fully parse every row.
270///
271/// # Validity
272///
273/// A [`GeoArrowArrayAccessor`] must always return a well-defined value for an index that is
274/// within the bounds `0..Array::len`, including for null indexes where [`Array::is_null`] is true.
275///
276/// The value at null indexes is unspecified, and implementations must not rely on a specific
277/// value such as [`Default::default`] being returned, however, it must not be undefined.
278pub trait GeoArrowArrayAccessor<'a>: GeoArrowArray {
279    /// The [geoarrow scalar object][crate::scalar] for this geometry array type.
280    type Item: Send + Sync + GeometryTrait<T = f64>;
281
282    /// Returns the element at index `i`, not considering validity.
283    ///
284    /// # Examples
285    ///
286    /// ```
287    /// use geo_traits::{CoordTrait, PointTrait};
288    /// # use geoarrow_array::GeoArrowArrayAccessor;
289    /// # use geoarrow_array::builder::PointBuilder;
290    /// # use geoarrow_schema::{Dimension, PointType};
291    ///
292    /// let point1 = geo_types::point!(x: 1., y: 2.);
293    ///
294    /// let point_type = PointType::new(Dimension::XY, Default::default());
295    /// let point_array =
296    ///     PointBuilder::from_nullable_points([Some(&point1), None].into_iter(), point_type.clone())
297    ///         .finish();
298    ///
299    /// let coord = point_array.value(0).unwrap().coord().unwrap();
300    /// assert_eq!(coord.x(), 1.);
301    /// assert_eq!(coord.y(), 2.);
302    /// ```
303    ///
304    /// # Errors
305    ///
306    /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
307    ///
308    /// # Panics
309    ///
310    /// Panics if the value is outside the bounds of the array.
311    fn value(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
312        assert!(index <= self.len());
313        unsafe { self.value_unchecked(index) }
314    }
315
316    /// Returns the element at index `i`, not considering validity.
317    ///
318    /// # Examples
319    ///
320    /// ```
321    /// use geo_traits::{CoordTrait, PointTrait};
322    /// # use geoarrow_array::GeoArrowArrayAccessor;
323    /// # use geoarrow_array::builder::PointBuilder;
324    /// # use geoarrow_schema::{Dimension, PointType};
325    ///
326    /// let point1 = geo_types::point!(x: 1., y: 2.);
327    ///
328    /// let point_type = PointType::new(Dimension::XY, Default::default());
329    /// let point_array =
330    ///     PointBuilder::from_nullable_points([Some(&point1), None].into_iter(), point_type.clone())
331    ///         .finish();
332    ///
333    /// let coord = unsafe { point_array.value_unchecked(0) }
334    ///     .unwrap()
335    ///     .coord()
336    ///     .unwrap();
337    /// assert_eq!(coord.x(), 1.);
338    /// assert_eq!(coord.y(), 2.);
339    /// ```
340    ///
341    /// # Errors
342    ///
343    /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
344    ///
345    /// # Safety
346    ///
347    /// Caller is responsible for ensuring that the index is within the bounds of the array
348    unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item>;
349
350    /// Returns the value at slot `i` as an Arrow scalar, considering validity.
351    ///
352    /// # Examples
353    ///
354    /// ```
355    /// # use geoarrow_array::GeoArrowArrayAccessor;
356    /// # use geoarrow_array::builder::PointBuilder;
357    /// # use geoarrow_schema::{Dimension, PointType};
358    /// #
359    /// let point1 = geo_types::point!(x: 1., y: 2.);
360    ///
361    /// let point_type = PointType::new(Dimension::XY, Default::default());
362    /// let point_array =
363    ///     PointBuilder::from_nullable_points([Some(&point1), None].into_iter(), point_type.clone())
364    ///         .finish();
365    ///
366    /// assert!(point_array.get(0).unwrap().is_some());
367    /// assert!(point_array.get(1).unwrap().is_none());
368    /// ```
369    ///
370    /// # Errors
371    ///
372    /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
373    fn get(&'a self, index: usize) -> GeoArrowResult<Option<Self::Item>> {
374        if self.is_null(index) {
375            return Ok(None);
376        }
377
378        Ok(Some(self.value(index)?))
379    }
380
381    /// Returns the value at slot `i` as an Arrow scalar, considering validity.
382    ///
383    /// # Errors
384    ///
385    /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
386    ///
387    /// # Safety
388    ///
389    /// Caller is responsible for ensuring that the index is within the bounds of the array
390    unsafe fn get_unchecked(&'a self, index: usize) -> Option<GeoArrowResult<Self::Item>> {
391        if self.is_null(index) {
392            return None;
393        }
394
395        Some(unsafe { self.value_unchecked(index) })
396    }
397
398    /// Iterates over this array's geoarrow scalar values, considering validity.
399    ///
400    /// # Errors
401    ///
402    /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
403    fn iter(&'a self) -> impl ExactSizeIterator<Item = Option<GeoArrowResult<Self::Item>>> + 'a {
404        (0..self.len()).map(|i| unsafe { self.get_unchecked(i) })
405    }
406
407    /// Iterator over geoarrow scalar values, not considering validity.
408    ///
409    /// # Errors
410    ///
411    /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
412    fn iter_values(&'a self) -> impl ExactSizeIterator<Item = GeoArrowResult<Self::Item>> + 'a {
413        (0..self.len()).map(|i| unsafe { self.value_unchecked(i) })
414    }
415}
416
417/// A trait describing a mutable geometry array; i.e. an array whose values can be changed.
418///
419// Note: This trait is not yet publicly exported from this crate, as we're not sure how the API
420// should be, and in particular whether we need this trait to be dyn-compatible or not.
421pub(crate) trait GeoArrowArrayBuilder: Debug + Send + Sync {
422    /// Returns the length of the array.
423    fn len(&self) -> usize;
424
425    /// Returns whether the array is empty.
426    fn is_empty(&self) -> bool {
427        self.len() == 0
428    }
429
430    /// Push a null value to this builder.
431    fn push_null(&mut self);
432
433    /// Push a geometry to this builder.
434    #[allow(dead_code)]
435    fn push_geometry(
436        &mut self,
437        geometry: Option<&impl GeometryTrait<T = f64>>,
438    ) -> GeoArrowResult<()>;
439
440    /// Finish the builder and return an [`Arc`] to the resulting array.
441    #[allow(dead_code)]
442    fn finish(self) -> Arc<dyn GeoArrowArray>;
443}
444
445#[cfg(test)]
446mod test {
447    use std::sync::Arc;
448
449    use arrow_array::Array;
450    use arrow_array::builder::{ArrayBuilder, FixedSizeListBuilder, Float64Builder, StructBuilder};
451    use arrow_schema::{DataType, Field};
452    use geoarrow_schema::{CoordType, Dimension, GeometryType, PointType};
453
454    use super::*;
455    use crate::builder::GeometryBuilder;
456    use crate::trait_::GeoArrowArray;
457
458    #[test]
459    fn infer_type_interleaved_point() {
460        let test_cases = [
461            (2, Dimension::XY),
462            (3, Dimension::XYZ),
463            (4, Dimension::XYZM),
464        ];
465        for (list_size, dim) in test_cases.into_iter() {
466            let array = FixedSizeListBuilder::new(Float64Builder::new(), list_size).finish();
467            let t =
468                GeoArrowType::try_from(&Field::new("", array.data_type().clone(), true)).unwrap();
469            assert_eq!(
470                t,
471                GeoArrowType::Point(
472                    PointType::new(dim, Default::default()).with_coord_type(CoordType::Interleaved)
473                )
474            );
475        }
476    }
477
478    #[test]
479    fn infer_type_separated_point() {
480        let test_cases = [
481            (
482                vec![
483                    Arc::new(Field::new("x", DataType::Float64, true)),
484                    Arc::new(Field::new("y", DataType::Float64, true)),
485                ],
486                vec![
487                    Box::new(Float64Builder::new()) as Box<dyn ArrayBuilder>,
488                    Box::new(Float64Builder::new()),
489                ],
490                Dimension::XY,
491            ),
492            (
493                vec![
494                    Arc::new(Field::new("x", DataType::Float64, true)),
495                    Arc::new(Field::new("y", DataType::Float64, true)),
496                    Arc::new(Field::new("z", DataType::Float64, true)),
497                ],
498                vec![
499                    Box::new(Float64Builder::new()) as Box<dyn ArrayBuilder>,
500                    Box::new(Float64Builder::new()),
501                    Box::new(Float64Builder::new()),
502                ],
503                Dimension::XYZ,
504            ),
505            (
506                vec![
507                    Arc::new(Field::new("x", DataType::Float64, true)),
508                    Arc::new(Field::new("y", DataType::Float64, true)),
509                    Arc::new(Field::new("z", DataType::Float64, true)),
510                    Arc::new(Field::new("m", DataType::Float64, true)),
511                ],
512                vec![
513                    Box::new(Float64Builder::new()) as Box<dyn ArrayBuilder>,
514                    Box::new(Float64Builder::new()),
515                    Box::new(Float64Builder::new()),
516                    Box::new(Float64Builder::new()),
517                ],
518                Dimension::XYZM,
519            ),
520        ];
521        for (fields, builders, dim) in test_cases.into_iter() {
522            let array = StructBuilder::new(fields, builders).finish();
523            let t =
524                GeoArrowType::try_from(&Field::new("", array.data_type().clone(), true)).unwrap();
525            assert_eq!(
526                t,
527                GeoArrowType::Point(
528                    PointType::new(dim, Default::default()).with_coord_type(CoordType::Separated)
529                )
530            );
531        }
532    }
533
534    #[test]
535    fn native_type_round_trip() {
536        let point_array = crate::test::point::point_array(CoordType::Interleaved);
537        let field = point_array.data_type.to_field("geometry", true);
538        let data_type: GeoArrowType = (&field).try_into().unwrap();
539        assert_eq!(point_array.data_type(), data_type);
540
541        let ml_array = crate::test::multilinestring::ml_array(CoordType::Interleaved);
542        let field = ml_array.data_type.to_field("geometry", true);
543        let data_type: GeoArrowType = (&field).try_into().unwrap();
544        assert_eq!(ml_array.data_type(), data_type);
545
546        let mut builder = GeometryBuilder::new(
547            GeometryType::new(Default::default()).with_coord_type(CoordType::Interleaved),
548        );
549        builder
550            .push_geometry(Some(&crate::test::point::p0()))
551            .unwrap();
552        builder
553            .push_geometry(Some(&crate::test::point::p1()))
554            .unwrap();
555        builder
556            .push_geometry(Some(&crate::test::point::p2()))
557            .unwrap();
558        builder
559            .push_geometry(Some(&crate::test::multilinestring::ml0()))
560            .unwrap();
561        builder
562            .push_geometry(Some(&crate::test::multilinestring::ml1()))
563            .unwrap();
564        let geom_array = builder.finish();
565        let field = geom_array.data_type.to_field("geometry", true);
566        let data_type: GeoArrowType = (&field).try_into().unwrap();
567        assert_eq!(geom_array.data_type(), data_type);
568    }
569}