geoarrow_array/trait_.rs
1use std::any::Any;
2use std::fmt::Debug;
3use std::sync::Arc;
4
5use arrow_array::{Array, ArrayRef};
6use arrow_buffer::NullBuffer;
7use arrow_schema::extension::ExtensionType;
8use geo_traits::GeometryTrait;
9use geoarrow_schema::error::GeoArrowResult;
10use geoarrow_schema::{GeoArrowType, Metadata};
11
12/// Convert GeoArrow arrays into their respective [arrow][arrow_array] arrays.
13pub trait IntoArrow {
14 /// The type of arrow array that this geoarrow array can be converted into.
15 type ArrowArray: Array;
16
17 /// The extension type representing this array. It will always be a type defined by
18 /// [geoarrow_schema].
19 type ExtensionType: ExtensionType;
20
21 /// Converts this geoarrow array into an arrow array.
22 ///
23 /// Note that [arrow][arrow_array] arrays do not maintain Arrow extension metadata, so the
24 /// result of this method will omit any spatial extension information. Ensure you call
25 /// [Self::extension_type] to get extension information that you can add to a
26 /// [`Field`][arrow_schema::Field].
27 fn into_arrow(self) -> Self::ArrowArray;
28
29 /// Return the Arrow extension type representing this array.
30 fn extension_type(&self) -> &Self::ExtensionType;
31}
32
33/// A base trait for all GeoArrow arrays.
34///
35/// This is a geospatial corollary to the upstream [`Array`] trait.
36pub trait GeoArrowArray: Debug + Send + Sync {
37 /// Returns the array as [`Any`] so that it can be downcasted to a specific implementation.
38 ///
39 /// Prefer using [`AsGeoArrowArray`][crate::cast::AsGeoArrowArray] instead of calling this
40 /// method and manually downcasting.
41 fn as_any(&self) -> &dyn Any;
42
43 /// Returns the [`GeoArrowType`] of this array.
44 ///
45 /// # Examples
46 ///
47 /// ```
48 /// # use geoarrow_array::builder::PointBuilder;
49 /// # use geoarrow_array::GeoArrowArray;
50 /// # use geoarrow_schema::{Dimension, PointType, GeoArrowType};
51 /// #
52 /// let point = geo_types::point!(x: 1., y: 2.);
53 /// let point_type = PointType::new(Dimension::XY, Default::default());
54 /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish();
55 /// assert_eq!(point_array.data_type(), GeoArrowType::Point(point_type));
56 /// ```
57 fn data_type(&self) -> GeoArrowType;
58
59 /// Converts this array into an `Arc`ed [`arrow`][arrow_array] array, consuming the original
60 /// array.
61 ///
62 /// This is `O(1)`.
63 ///
64 /// Note that **this will omit any spatial extension information**. You must separately store
65 /// the spatial information in a [`Field`][arrow_schema::Field] derived from
66 /// [`Self::data_type`].
67 ///
68 /// # Examples
69 ///
70 /// ```
71 /// # use arrow_array::ArrayRef;
72 /// # use geoarrow_array::builder::PointBuilder;
73 /// # use geoarrow_array::GeoArrowArray;
74 /// # use geoarrow_schema::{Dimension, PointType};
75 /// #
76 /// let point = geo_types::point!(x: 1., y: 2.);
77 /// let point_type = PointType::new(Dimension::XY, Default::default());
78 /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish();
79 /// let array_ref: ArrayRef = point_array.into_array_ref();
80 /// ```
81 #[must_use]
82 fn into_array_ref(self) -> ArrayRef;
83
84 /// Converts this array into an `Arc`ed [`arrow`][arrow_array] array.
85 ///
86 /// This is `O(1)`.
87 ///
88 /// Note that **this will omit any spatial extension information**. You must separately store
89 /// the spatial information in a [`Field`][arrow_schema::Field] derived from
90 /// [`Self::data_type`].
91 ///
92 /// # Examples
93 ///
94 /// ```
95 /// # use arrow_array::ArrayRef;
96 /// # use geoarrow_array::builder::PointBuilder;
97 /// # use geoarrow_array::GeoArrowArray;
98 /// # use geoarrow_schema::{Dimension, PointType};
99 /// #
100 /// let point = geo_types::point!(x: 1., y: 2.);
101 /// let point_type = PointType::new(Dimension::XY, Default::default());
102 /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish();
103 /// let array_ref: ArrayRef = point_array.to_array_ref();
104 /// ```
105 #[must_use]
106 fn to_array_ref(&self) -> ArrayRef;
107
108 /// The number of geometries contained in this array.
109 ///
110 /// # Examples
111 ///
112 /// ```
113 /// # use arrow_array::ArrayRef;
114 /// # use geoarrow_array::builder::PointBuilder;
115 /// # use geoarrow_array::GeoArrowArray;
116 /// # use geoarrow_schema::{Dimension, PointType};
117 /// #
118 /// let point = geo_types::point!(x: 1., y: 2.);
119 /// let point_type = PointType::new(Dimension::XY, Default::default());
120 /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish();
121 /// assert_eq!(point_array.len(), 1);
122 /// ```
123 fn len(&self) -> usize;
124
125 /// Returns `true` if the array is empty.
126 ///
127 /// # Examples
128 ///
129 /// ```
130 /// # use arrow_array::ArrayRef;
131 /// # use geoarrow_array::builder::PointBuilder;
132 /// # use geoarrow_array::GeoArrowArray;
133 /// # use geoarrow_schema::{Dimension, PointType};
134 /// #
135 /// let point = geo_types::point!(x: 1., y: 2.);
136 /// let point_type = PointType::new(Dimension::XY, Default::default());
137 /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish();
138 /// assert!(!point_array.is_empty());
139 /// ```
140 fn is_empty(&self) -> bool {
141 self.len() == 0
142 }
143
144 /// Returns a potentially computed [`NullBuffer``] that represents the logical null values of
145 /// this array, if any.
146 ///
147 /// Logical nulls represent the values that are null in the array, regardless of the underlying
148 /// physical arrow representation.
149 ///
150 /// For most array types, this is equivalent to the "physical" nulls returned by
151 /// [`Array::nulls`]. However it is different for union arrays, including our
152 /// [`GeometryArray`][crate::array::GeometryArray] and
153 /// [`GeometryCollectionArray`][crate::array::GeometryCollectionArray] types, because the
154 /// unions aren't encoded in a single null buffer.
155 fn logical_nulls(&self) -> Option<NullBuffer>;
156
157 /// Returns the number of null slots in this array.
158 ///
159 /// This is `O(1)` since the number of null elements is pre-computed.
160 ///
161 /// # Examples
162 ///
163 /// ```
164 /// # use geoarrow_array::GeoArrowArray;
165 /// # use geoarrow_array::builder::PointBuilder;
166 /// # use geoarrow_schema::{Dimension, PointType};
167 /// #
168 /// let point = geo_types::point!(x: 1., y: 2.);
169 /// let point_type = PointType::new(Dimension::XY, Default::default());
170 /// let point_array =
171 /// PointBuilder::from_nullable_points([Some(&point), None].into_iter(), point_type.clone()).finish();
172 /// assert_eq!(point_array.logical_null_count(), 1);
173 /// ```
174 fn logical_null_count(&self) -> usize;
175
176 /// Returns whether slot `i` is null.
177 ///
178 /// # Examples
179 ///
180 /// ```
181 /// # use geoarrow_array::GeoArrowArray;
182 /// # use geoarrow_array::builder::PointBuilder;
183 /// # use geoarrow_schema::{Dimension, PointType};
184 /// #
185 /// let point = geo_types::point!(x: 1., y: 2.);
186 ///
187 /// let point_type = PointType::new(Dimension::XY, Default::default());
188 /// let point_array =
189 /// PointBuilder::from_nullable_points([Some(&point), None].into_iter(), point_type.clone()).finish();
190 /// assert!(point_array.is_null(1));
191 /// ```
192 ///
193 /// # Panics
194 ///
195 /// Panics iff `i >= self.len()`.
196 fn is_null(&self, i: usize) -> bool;
197
198 /// Returns whether slot `i` is valid.
199 ///
200 /// # Examples
201 ///
202 /// ```
203 /// # use geoarrow_array::GeoArrowArray;
204 /// # use geoarrow_array::builder::PointBuilder;
205 /// # use geoarrow_schema::{Dimension, PointType};
206 /// #
207 /// let point = geo_types::point!(x: 1., y: 2.);
208 ///
209 /// let point_type = PointType::new(Dimension::XY, Default::default());
210 /// let point_array =
211 /// PointBuilder::from_nullable_points([Some(&point), None].into_iter(), point_type.clone()).finish();
212 /// assert!(point_array.is_valid(0));
213 /// ```
214 ///
215 /// # Panics
216 ///
217 /// Panics iff `i >= self.len()`.
218 #[inline]
219 fn is_valid(&self, i: usize) -> bool {
220 !self.is_null(i)
221 }
222
223 /// Returns a zero-copy slice of this array with the indicated offset and length.
224 ///
225 /// # Examples
226 ///
227 /// ```
228 /// # use std::sync::Arc;
229 /// #
230 /// # use geoarrow_array::GeoArrowArray;
231 /// # use geoarrow_array::builder::PointBuilder;
232 /// # use geoarrow_schema::{Dimension, PointType};
233 /// #
234 /// let point1 = geo_types::point!(x: 1., y: 2.);
235 /// let point2 = geo_types::point!(x: 3., y: 4.);
236 ///
237 /// let point_type = PointType::new(Dimension::XY, Default::default());
238 /// let point_array =
239 /// Arc::new(PointBuilder::from_points([point1, point2].iter(), point_type.clone()).finish())
240 /// as Arc<dyn GeoArrowArray>;
241 /// let sliced_array = point_array.slice(1, 1);
242 /// assert_eq!(sliced_array.len(), 1);
243 /// ```
244 ///
245 /// # Panics
246 ///
247 /// This function panics iff `offset + length > self.len()`.
248 #[must_use]
249 fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray>;
250
251 /// Change the [`Metadata`] of this array.
252 fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray>;
253}
254
255/// A trait for accessing the values of a [`GeoArrowArray`].
256///
257/// # Performance
258///
259/// Accessing a geometry from a "native" array, such as `PointArray`, `MultiPolygonArray` or
260/// `GeometryArray` will always be constant-time and zero-copy.
261///
262/// Accessing a geometry from a "serialized" array such as `GenericWkbArray` or `GenericWktArray`
263/// will trigger some amount of parsing. In the case of `GenericWkbArray`, accessing an item will
264/// read the WKB header and scan the buffer if needed to find internal geometry offsets, but will
265/// not copy any internal coordinates. This allows for later access to be constant-time (though not
266/// necessarily zero-copy, since WKB is not byte-aligned). In the case of `GenericWktArray`,
267/// accessing a geometry will fully parse the WKT string and copy coordinates to a separate
268/// representation. This means that calling `.iter()` on a `GenericWktArray` will transparently
269/// fully parse every row.
270///
271/// # Validity
272///
273/// A [`GeoArrowArrayAccessor`] must always return a well-defined value for an index that is
274/// within the bounds `0..Array::len`, including for null indexes where [`Array::is_null`] is true.
275///
276/// The value at null indexes is unspecified, and implementations must not rely on a specific
277/// value such as [`Default::default`] being returned, however, it must not be undefined.
278pub trait GeoArrowArrayAccessor<'a>: GeoArrowArray {
279 /// The [geoarrow scalar object][crate::scalar] for this geometry array type.
280 type Item: Send + Sync + GeometryTrait<T = f64>;
281
282 /// Returns the element at index `i`, not considering validity.
283 ///
284 /// # Examples
285 ///
286 /// ```
287 /// use geo_traits::{CoordTrait, PointTrait};
288 /// # use geoarrow_array::GeoArrowArrayAccessor;
289 /// # use geoarrow_array::builder::PointBuilder;
290 /// # use geoarrow_schema::{Dimension, PointType};
291 ///
292 /// let point1 = geo_types::point!(x: 1., y: 2.);
293 ///
294 /// let point_type = PointType::new(Dimension::XY, Default::default());
295 /// let point_array =
296 /// PointBuilder::from_nullable_points([Some(&point1), None].into_iter(), point_type.clone())
297 /// .finish();
298 ///
299 /// let coord = point_array.value(0).unwrap().coord().unwrap();
300 /// assert_eq!(coord.x(), 1.);
301 /// assert_eq!(coord.y(), 2.);
302 /// ```
303 ///
304 /// # Errors
305 ///
306 /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
307 ///
308 /// # Panics
309 ///
310 /// Panics if the value is outside the bounds of the array.
311 fn value(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
312 assert!(index <= self.len());
313 unsafe { self.value_unchecked(index) }
314 }
315
316 /// Returns the element at index `i`, not considering validity.
317 ///
318 /// # Examples
319 ///
320 /// ```
321 /// use geo_traits::{CoordTrait, PointTrait};
322 /// # use geoarrow_array::GeoArrowArrayAccessor;
323 /// # use geoarrow_array::builder::PointBuilder;
324 /// # use geoarrow_schema::{Dimension, PointType};
325 ///
326 /// let point1 = geo_types::point!(x: 1., y: 2.);
327 ///
328 /// let point_type = PointType::new(Dimension::XY, Default::default());
329 /// let point_array =
330 /// PointBuilder::from_nullable_points([Some(&point1), None].into_iter(), point_type.clone())
331 /// .finish();
332 ///
333 /// let coord = unsafe { point_array.value_unchecked(0) }
334 /// .unwrap()
335 /// .coord()
336 /// .unwrap();
337 /// assert_eq!(coord.x(), 1.);
338 /// assert_eq!(coord.y(), 2.);
339 /// ```
340 ///
341 /// # Errors
342 ///
343 /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
344 ///
345 /// # Safety
346 ///
347 /// Caller is responsible for ensuring that the index is within the bounds of the array
348 unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item>;
349
350 /// Returns the value at slot `i` as an Arrow scalar, considering validity.
351 ///
352 /// # Examples
353 ///
354 /// ```
355 /// # use geoarrow_array::GeoArrowArrayAccessor;
356 /// # use geoarrow_array::builder::PointBuilder;
357 /// # use geoarrow_schema::{Dimension, PointType};
358 /// #
359 /// let point1 = geo_types::point!(x: 1., y: 2.);
360 ///
361 /// let point_type = PointType::new(Dimension::XY, Default::default());
362 /// let point_array =
363 /// PointBuilder::from_nullable_points([Some(&point1), None].into_iter(), point_type.clone())
364 /// .finish();
365 ///
366 /// assert!(point_array.get(0).unwrap().is_some());
367 /// assert!(point_array.get(1).unwrap().is_none());
368 /// ```
369 ///
370 /// # Errors
371 ///
372 /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
373 fn get(&'a self, index: usize) -> GeoArrowResult<Option<Self::Item>> {
374 if self.is_null(index) {
375 return Ok(None);
376 }
377
378 Ok(Some(self.value(index)?))
379 }
380
381 /// Returns the value at slot `i` as an Arrow scalar, considering validity.
382 ///
383 /// # Errors
384 ///
385 /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
386 ///
387 /// # Safety
388 ///
389 /// Caller is responsible for ensuring that the index is within the bounds of the array
390 unsafe fn get_unchecked(&'a self, index: usize) -> Option<GeoArrowResult<Self::Item>> {
391 if self.is_null(index) {
392 return None;
393 }
394
395 Some(unsafe { self.value_unchecked(index) })
396 }
397
398 /// Iterates over this array's geoarrow scalar values, considering validity.
399 ///
400 /// # Errors
401 ///
402 /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
403 fn iter(&'a self) -> impl ExactSizeIterator<Item = Option<GeoArrowResult<Self::Item>>> + 'a {
404 (0..self.len()).map(|i| unsafe { self.get_unchecked(i) })
405 }
406
407 /// Iterator over geoarrow scalar values, not considering validity.
408 ///
409 /// # Errors
410 ///
411 /// Errors for invalid WKT and WKB geometries. Will never error for native arrays.
412 fn iter_values(&'a self) -> impl ExactSizeIterator<Item = GeoArrowResult<Self::Item>> + 'a {
413 (0..self.len()).map(|i| unsafe { self.value_unchecked(i) })
414 }
415}
416
417/// A trait describing a mutable geometry array; i.e. an array whose values can be changed.
418///
419// Note: This trait is not yet publicly exported from this crate, as we're not sure how the API
420// should be, and in particular whether we need this trait to be dyn-compatible or not.
421pub(crate) trait GeoArrowArrayBuilder: Debug + Send + Sync {
422 /// Returns the length of the array.
423 fn len(&self) -> usize;
424
425 /// Returns whether the array is empty.
426 fn is_empty(&self) -> bool {
427 self.len() == 0
428 }
429
430 /// Push a null value to this builder.
431 fn push_null(&mut self);
432
433 /// Push a geometry to this builder.
434 #[allow(dead_code)]
435 fn push_geometry(
436 &mut self,
437 geometry: Option<&impl GeometryTrait<T = f64>>,
438 ) -> GeoArrowResult<()>;
439
440 /// Finish the builder and return an [`Arc`] to the resulting array.
441 #[allow(dead_code)]
442 fn finish(self) -> Arc<dyn GeoArrowArray>;
443}
444
445#[cfg(test)]
446mod test {
447 use std::sync::Arc;
448
449 use arrow_array::Array;
450 use arrow_array::builder::{ArrayBuilder, FixedSizeListBuilder, Float64Builder, StructBuilder};
451 use arrow_schema::{DataType, Field};
452 use geoarrow_schema::{CoordType, Dimension, GeometryType, PointType};
453
454 use super::*;
455 use crate::builder::GeometryBuilder;
456 use crate::trait_::GeoArrowArray;
457
458 #[test]
459 fn infer_type_interleaved_point() {
460 let test_cases = [
461 (2, Dimension::XY),
462 (3, Dimension::XYZ),
463 (4, Dimension::XYZM),
464 ];
465 for (list_size, dim) in test_cases.into_iter() {
466 let array = FixedSizeListBuilder::new(Float64Builder::new(), list_size).finish();
467 let t =
468 GeoArrowType::try_from(&Field::new("", array.data_type().clone(), true)).unwrap();
469 assert_eq!(
470 t,
471 GeoArrowType::Point(
472 PointType::new(dim, Default::default()).with_coord_type(CoordType::Interleaved)
473 )
474 );
475 }
476 }
477
478 #[test]
479 fn infer_type_separated_point() {
480 let test_cases = [
481 (
482 vec![
483 Arc::new(Field::new("x", DataType::Float64, true)),
484 Arc::new(Field::new("y", DataType::Float64, true)),
485 ],
486 vec![
487 Box::new(Float64Builder::new()) as Box<dyn ArrayBuilder>,
488 Box::new(Float64Builder::new()),
489 ],
490 Dimension::XY,
491 ),
492 (
493 vec![
494 Arc::new(Field::new("x", DataType::Float64, true)),
495 Arc::new(Field::new("y", DataType::Float64, true)),
496 Arc::new(Field::new("z", DataType::Float64, true)),
497 ],
498 vec![
499 Box::new(Float64Builder::new()) as Box<dyn ArrayBuilder>,
500 Box::new(Float64Builder::new()),
501 Box::new(Float64Builder::new()),
502 ],
503 Dimension::XYZ,
504 ),
505 (
506 vec![
507 Arc::new(Field::new("x", DataType::Float64, true)),
508 Arc::new(Field::new("y", DataType::Float64, true)),
509 Arc::new(Field::new("z", DataType::Float64, true)),
510 Arc::new(Field::new("m", DataType::Float64, true)),
511 ],
512 vec![
513 Box::new(Float64Builder::new()) as Box<dyn ArrayBuilder>,
514 Box::new(Float64Builder::new()),
515 Box::new(Float64Builder::new()),
516 Box::new(Float64Builder::new()),
517 ],
518 Dimension::XYZM,
519 ),
520 ];
521 for (fields, builders, dim) in test_cases.into_iter() {
522 let array = StructBuilder::new(fields, builders).finish();
523 let t =
524 GeoArrowType::try_from(&Field::new("", array.data_type().clone(), true)).unwrap();
525 assert_eq!(
526 t,
527 GeoArrowType::Point(
528 PointType::new(dim, Default::default()).with_coord_type(CoordType::Separated)
529 )
530 );
531 }
532 }
533
534 #[test]
535 fn native_type_round_trip() {
536 let point_array = crate::test::point::point_array(CoordType::Interleaved);
537 let field = point_array.data_type.to_field("geometry", true);
538 let data_type: GeoArrowType = (&field).try_into().unwrap();
539 assert_eq!(point_array.data_type(), data_type);
540
541 let ml_array = crate::test::multilinestring::ml_array(CoordType::Interleaved);
542 let field = ml_array.data_type.to_field("geometry", true);
543 let data_type: GeoArrowType = (&field).try_into().unwrap();
544 assert_eq!(ml_array.data_type(), data_type);
545
546 let mut builder = GeometryBuilder::new(
547 GeometryType::new(Default::default()).with_coord_type(CoordType::Interleaved),
548 );
549 builder
550 .push_geometry(Some(&crate::test::point::p0()))
551 .unwrap();
552 builder
553 .push_geometry(Some(&crate::test::point::p1()))
554 .unwrap();
555 builder
556 .push_geometry(Some(&crate::test::point::p2()))
557 .unwrap();
558 builder
559 .push_geometry(Some(&crate::test::multilinestring::ml0()))
560 .unwrap();
561 builder
562 .push_geometry(Some(&crate::test::multilinestring::ml1()))
563 .unwrap();
564 let geom_array = builder.finish();
565 let field = geom_array.data_type.to_field("geometry", true);
566 let data_type: GeoArrowType = (&field).try_into().unwrap();
567 assert_eq!(geom_array.data_type(), data_type);
568 }
569}